You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/03 05:07:38 UTC

[1/6] incubator-joshua git commit: Clean up some code smells detected by findbugs (our static code analyzer)

Repository: incubator-joshua
Updated Branches:
  refs/heads/master cf5fbb5ac -> 2c02feafe


Clean up some code smells detected by findbugs (our static code analyzer)


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/7fd3cfcb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/7fd3cfcb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/7fd3cfcb

Branch: refs/heads/master
Commit: 7fd3cfcbbd12f07d0b4eb58ef02c39f4af7ef7e3
Parents: cf5fbb5
Author: Pavel Danchenko <da...@amazon.com>
Authored: Mon Dec 14 10:34:16 2015 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 12:20:53 2016 -0700

----------------------------------------------------------------------
 src/joshua/corpus/Span.java                     | 10 ++++-----
 src/joshua/decoder/BLEU.java                    | 13 ++++++------
 src/joshua/decoder/Decoder.java                 |  4 +++-
 src/joshua/decoder/JoshuaConfiguration.java     |  2 +-
 src/joshua/decoder/chart_parser/DotChart.java   | 10 +++++----
 src/joshua/decoder/ff/FeatureVector.java        | 16 +++++++-------
 src/joshua/decoder/ff/LabelCombinationFF.java   |  8 +++----
 src/joshua/decoder/ff/RuleShape.java            |  8 +++----
 src/joshua/decoder/ff/fragmentlm/Tree.java      | 22 +++++++++-----------
 .../ff/lm/StateMinimizingLanguageModel.java     |  4 +++-
 .../ff/similarity/EdgePhraseSimilarityFF.java   |  8 ++++---
 .../decoder/ff/tm/SentenceFilteredGrammar.java  |  4 +---
 .../GrammarBuilderWalkerFunction.java           |  5 -----
 src/joshua/decoder/hypergraph/HyperGraph.java   | 14 +++++--------
 .../decoder/hypergraph/KBestExtractor.java      |  6 +++---
 .../decoder/io/TranslationRequestStream.java    |  2 +-
 src/joshua/decoder/segment_file/Sentence.java   | 15 ++++++-------
 src/joshua/lattice/Lattice.java                 |  6 +++---
 src/joshua/metrics/BLEU.java                    |  9 ++++----
 src/joshua/metrics/EvaluationMetric.java        |  6 +++---
 src/joshua/metrics/MinimumChangeBLEU.java       | 11 +++++-----
 src/joshua/metrics/Precis.java                  |  9 ++++----
 src/joshua/util/JoshuaEval.java                 |  5 ++---
 src/joshua/util/Lists.java                      |  3 +++
 .../util/encoding/FeatureTypeAnalyzer.java      | 14 ++++---------
 25 files changed, 102 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/corpus/Span.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Span.java b/src/joshua/corpus/Span.java
index d26d5ea..a51a9d2 100644
--- a/src/joshua/corpus/Span.java
+++ b/src/joshua/corpus/Span.java
@@ -21,6 +21,7 @@ package joshua.corpus;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
  * Represents a span with an inclusive starting index and an exclusive ending index.
@@ -113,14 +114,13 @@ public class Span implements Iterable<Integer>, Comparable<Span> {
       int next = start;
 
       public boolean hasNext() {
-        if (next < end) {
-          return true;
-        } else {
-          return false;
-        }
+        return next < end;
       }
 
       public Integer next() {
+        if (!hasNext()) {
+          throw new NoSuchElementException();
+        }
         return next++;
       }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/BLEU.java b/src/joshua/decoder/BLEU.java
index 129b792..1b3e3f8 100644
--- a/src/joshua/decoder/BLEU.java
+++ b/src/joshua/decoder/BLEU.java
@@ -145,9 +145,10 @@ public class BLEU {
     float resBleu = 0.0f;
 
     int[] numNgramMatch = new int[bleuOrder];
-    for (String ngram : hypNgramTbl.keySet()) {// each ngram in hyp
+    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {// each ngram in hyp
+      String ngram = entry.getKey();
       if (maxRefCountTbl.containsKey(ngram)) {
-        int hypNgramCount = hypNgramTbl.get(ngram);
+        int hypNgramCount = entry.getValue();
 
         int effectiveNumMatch = hypNgramCount;
 
@@ -187,14 +188,14 @@ public class BLEU {
     float resBleu = 0;
 
     int[] numNgramMatch = new int[bleuOrder];
-    for (Iterator<String> it = hypNgramTbl.keySet().iterator(); it.hasNext();) {
-      String ngram = it.next();
+    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {
+      String ngram = entry.getKey();
       if (refNgramTbl.containsKey(ngram)) {
         if (doNgramClip) {
           numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
-              refNgramTbl.get(ngram), hypNgramTbl.get(ngram)); // ngram clip
+              refNgramTbl.get(ngram), entry.getValue()); // ngram clip
         } else {
-          numNgramMatch[Regex.spaces.split(ngram).length - 1] += hypNgramTbl.get(ngram);// without
+          numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without
                                                                                         // ngram
                                                                                         // count
                                                                                         // clipping

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/Decoder.java b/src/joshua/decoder/Decoder.java
index aab6d36..0057f87 100644
--- a/src/joshua/decoder/Decoder.java
+++ b/src/joshua/decoder/Decoder.java
@@ -35,6 +35,8 @@ import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 
+import com.google.common.base.Strings;
+
 import joshua.corpus.Vocabulary;
 import joshua.decoder.ff.FeatureVector;
 import joshua.decoder.JoshuaConfiguration.INPUT_TYPE;
@@ -639,7 +641,7 @@ public class Decoder {
       
       
       /* Add command-line-passed weights to the weights array for processing below */
-      if (joshuaConfiguration.weight_overwrite != "") {
+      if (!Strings.isNullOrEmpty(joshuaConfiguration.weight_overwrite)) {
         String[] tokens = joshuaConfiguration.weight_overwrite.split("\\s+");
         for (int i = 0; i < tokens.length; i += 2) {
           String feature = tokens[i];

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index a825268..7a3de23 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -538,7 +538,7 @@ public class JoshuaConfiguration {
             logger.finest(String.format("mark_oovs: %s", mark_oovs));
 
           } else if (parameter.equals(normalize_key("pop-limit"))) {
-            pop_limit = Integer.valueOf(fds[1]);
+            pop_limit = Integer.parseInt(fds[1]);
             logger.finest(String.format("pop-limit: %s", pop_limit));
 
           } else if (parameter.equals(normalize_key("input-type"))) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/chart_parser/DotChart.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/DotChart.java b/src/joshua/decoder/chart_parser/DotChart.java
index 64972d5..b82b68c 100644
--- a/src/joshua/decoder/chart_parser/DotChart.java
+++ b/src/joshua/decoder/chart_parser/DotChart.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -302,18 +303,19 @@ class DotChart {
    */
 
   private ArrayList<Trie> matchAll(DotNode dotNode, int wordID) {
-    ArrayList<Trie> trieList = new ArrayList<Trie>();
+    ArrayList<Trie> trieList = new ArrayList<>();
     HashMap<Integer, ? extends Trie> childrenTbl = dotNode.trieNode.getChildren();
 
     if (childrenTbl != null && wordID >= 0) {
       // get all the extensions, map to string, check for *, build regexp
-      for (Integer arcID : childrenTbl.keySet()) {
+      for (Map.Entry<Integer, ? extends Trie> entry : childrenTbl.entrySet()) {
+        Integer arcID = entry.getKey();
         if (arcID == wordID) {
-          trieList.add(childrenTbl.get(arcID));
+          trieList.add(entry.getValue());
         } else {
           String arcWord = Vocabulary.word(arcID);
           if (Vocabulary.word(wordID).matches(arcWord)) {
-            trieList.add(childrenTbl.get(arcID));
+            trieList.add(entry.getValue());
           }
         }
       }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/FeatureVector.java b/src/joshua/decoder/ff/FeatureVector.java
index f95a93c..dcbcda2 100644
--- a/src/joshua/decoder/ff/FeatureVector.java
+++ b/src/joshua/decoder/ff/FeatureVector.java
@@ -315,13 +315,13 @@ public class FeatureVector {
    * to be compatible with their tuners.
    */
   public String mosesString() {
-    String outputString = "";
+    StringBuilder outputString = new StringBuilder();
     
     HashSet<String> printed_keys = new HashSet<String>();
     
     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
-      outputString += String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), getDense(i));
+      outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }
     
@@ -334,10 +334,10 @@ public class FeatureVector {
         if (key.equals("OOVPenalty"))
           // force moses to see it as sparse
           key = "OOV_Penalty";
-        outputString += String.format("%s=%.3f ", key, value);
+        outputString.append(String.format("%s=%.3f ", key, value));
       }
     }
-    return outputString.trim();
+    return outputString.toString().trim();
   }
     
   /***
@@ -346,13 +346,13 @@ public class FeatureVector {
    */
   @Override
   public String toString() {
-    String outputString = "";
+    StringBuilder outputString = new StringBuilder();
     
     HashSet<String> printed_keys = new HashSet<String>();
     
     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
-      outputString += String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i));
+      outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }
     
@@ -361,8 +361,8 @@ public class FeatureVector {
     Collections.sort(keys);
     for (String key: keys)
       if (! printed_keys.contains(key))
-        outputString += String.format("%s=%.3f ", key, sparseFeatures.get(key));
+        outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key)));
 
-    return outputString.trim();
+    return outputString.toString().trim();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/LabelCombinationFF.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/LabelCombinationFF.java b/src/joshua/decoder/ff/LabelCombinationFF.java
index 37e83f9..38a85db 100644
--- a/src/joshua/decoder/ff/LabelCombinationFF.java
+++ b/src/joshua/decoder/ff/LabelCombinationFF.java
@@ -42,13 +42,13 @@ public class LabelCombinationFF extends StatelessFF {
   }
 
   private final String computeRuleLabelCombinationDescriptor(Rule rule) {
-    String result = getLowerCasedFeatureName() + "_";
-    result += RulePropertiesQuerying.getLHSAsString(rule);
+    StringBuilder result = new StringBuilder(getLowerCasedFeatureName() + "_");
+    result.append(RulePropertiesQuerying.getLHSAsString(rule));
     // System.out.println("Rule: " + rule);
     for (String foreignNonterminalString : RulePropertiesQuerying.getRuleSourceNonterminalStrings(rule)) {
-      result += "_" + foreignNonterminalString;
+      result.append("_").append(foreignNonterminalString);
     }
-    return result;
+    return result.toString();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/RuleShape.java b/src/joshua/decoder/ff/RuleShape.java
index ff022ef..e243528 100644
--- a/src/joshua/decoder/ff/RuleShape.java
+++ b/src/joshua/decoder/ff/RuleShape.java
@@ -43,20 +43,20 @@ public class RuleShape extends StatelessFF {
   }
   
   private String pattern(int[] ids) {
-    String pattern = "";
+    StringBuilder pattern = new StringBuilder();
     int curtype = gettype(ids[0]);
     int curcount = 1;
     for (int i = 1; i < ids.length; i++) {
       if (gettype(ids[i]) != curtype) {
-        pattern += String.format("%s%s_", curtype < 0 ? "N" : "x", curcount > 1 ? "+" : "");
+        pattern.append(String.format("%s%s_", curtype < 0 ? "N" : "x", curcount > 1 ? "+" : ""));
         curtype = gettype(ids[i]);
         curcount = 1;
       } else {
         curcount++;
       }
     }
-    pattern += String.format("%s%s_", curtype < 0 ? "N" : "x", curcount > 1 ? "+" : "");
-    return pattern;
+    pattern.append(String.format("%s%s_", curtype < 0 ? "N" : "x", curcount > 1 ? "+" : ""));
+    return pattern.toString();
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/fragmentlm/Tree.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/fragmentlm/Tree.java b/src/joshua/decoder/ff/fragmentlm/Tree.java
index a4aa5a8..b52ccce 100644
--- a/src/joshua/decoder/ff/fragmentlm/Tree.java
+++ b/src/joshua/decoder/ff/fragmentlm/Tree.java
@@ -115,14 +115,14 @@ public class Tree implements Serializable {
    * @return
    */
   public String getRule() {
-    String ruleString = null;
-    if (!isLeaf()) {
-      ruleString = "(" + Vocabulary.word(getLabel());
-      for (Tree child : getChildren())
-        ruleString += " " + Vocabulary.word(child.getLabel());
+    if (isLeaf()) {
+      return null;
     }
-
-    return ruleString;
+    StringBuilder ruleString = new StringBuilder("(" + Vocabulary.word(getLabel()));
+    for (Tree child : getChildren()) {
+      ruleString.append(" ").append(Vocabulary.word(child.getLabel()));
+    }
+    return ruleString.toString();
   }
 
   /*
@@ -537,7 +537,7 @@ public class Tree implements Serializable {
    * 
    * @param rule
    * @param tailNodes
-   * @param derivation
+   * @param derivation - should not be null
    * @param maxDepth
    * @return
    */
@@ -551,10 +551,8 @@ public class Tree implements Serializable {
     tree = tree.shallowClone();
     
     System.err.println(String.format("buildTree(%s)", tree));
-    if (derivationStates != null) {
-      for (int i = 0; i < derivationStates.length; i++) {
-        System.err.println(String.format("  -> %d: %s", i, derivationStates[i]));
-      }
+    for (int i = 0; i < derivationStates.length; i++) {
+      System.err.println(String.format("  -> %d: %s", i, derivationStates[i]));
     }
 
     List<Tree> frontier = tree.getNonterminalYield();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 5e406de..f07b668 100644
--- a/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -144,8 +144,10 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
     }
     
     int sentID = sentence.id();
-    if (!poolMap.containsKey(sentID))
+    // Since sentId is unique across threads, next operations are safe, but not atomic!
+    if (!poolMap.containsKey(sentID)) {
       poolMap.put(sentID, KenLM.createPool());
+    }
 
     // Get the probability of applying the rule and the new state
     StateProbPair pair = ((KenLM) languageModel).probRule(words, poolMap.get(sentID));

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java b/src/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
index 22c3733..3497001 100644
--- a/src/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ b/src/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
@@ -25,8 +25,11 @@ import java.io.PrintWriter;
 import java.net.Socket;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
+import com.google.common.base.Throwables;
+
 import joshua.corpus.Vocabulary;
 import joshua.decoder.JoshuaConfiguration;
 import joshua.decoder.chart_parser.SourcePath;
@@ -195,8 +198,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
     try {
       return new EdgePhraseSimilarityFF(this.weights, args, config);
     } catch (Exception e) {
-      e.printStackTrace();
-      return null;
+      throw Throwables.propagate(e);
     }
   }
 
@@ -229,7 +231,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
       int[] source = batch.get(i);
       int[] target = batch.get(i + 1);
 
-      if (source.equals(target)) {
+      if (Arrays.equals(source, target)) {
         similarity += 1;
         count++;
       } else {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
index 694430b..d540727 100644
--- a/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ b/src/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
@@ -56,10 +56,8 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
     int origCount = getNumRules(baseGrammar.getTrieRoot());
     long startTime = System.currentTimeMillis();
 
-    /* Filter the rules */
+    /* Filter the rules; returns non-null object */
     this.filteredTrie = filter(baseGrammar.getTrieRoot());
-    if (filteredTrie == null)
-      filteredTrie = new SentenceFilteredTrie(baseGrammar.getTrieRoot());
     int filteredCount = getNumRules();
 
     float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index 5a3b422..12e79c5 100644
--- a/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/src/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -90,11 +90,6 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
 
   private Rule getRuleWithSpans(HyperEdge edge, HGNode head) {
     Rule edgeRule = edge.getRule();
-    // System.err.printf("EdgeRule: %s\n", edgeRule);
-    if (!(edgeRule instanceof Rule)) {
-      // System.err.println("edge rule is not a bilingual rule");
-      return null;
-    }
     int headLabel = getLabelWithSpan(head);
     // System.err.printf("Head label: %s\n", headLabel);
     // if (edge.getAntNodes() != null) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/hypergraph/HyperGraph.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/HyperGraph.java b/src/joshua/decoder/hypergraph/HyperGraph.java
index 2d1cfb0..003c930 100644
--- a/src/joshua/decoder/hypergraph/HyperGraph.java
+++ b/src/joshua/decoder/hypergraph/HyperGraph.java
@@ -144,19 +144,15 @@ public class HyperGraph {
    * @param fileName
    */
   public void dump(String fileName, List<FeatureFunction> model) {
-    PrintWriter out = null;
-    try {
-      out = new PrintWriter(fileName, "UTF-8");
+    try ( PrintWriter out = new PrintWriter(fileName, "UTF-8") ) {
+      count();
+      out.println("# target ||| features");
+      out.println(String.format("%d %d", numNodes, numEdges));
+      new ForestWalker(TRAVERSAL.POSTORDER).walk(this.goalNode, new HyperGraphDumper(out, model));
     } catch (IOException e) {
       System.err.println("* Can't dump hypergraph to file '" + fileName + "'");
       e.printStackTrace();
     }
-    
-    count();
-    out.println("# target ||| features");
-    out.println(String.format("%d %d", numNodes, numEdges));
-    new ForestWalker(TRAVERSAL.POSTORDER).walk(this.goalNode, new HyperGraphDumper(out, model));
-    out.close();
   }
 
   public float bestScore() {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/KBestExtractor.java b/src/joshua/decoder/hypergraph/KBestExtractor.java
index 98f6f15..6dd3207 100644
--- a/src/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/joshua/decoder/hypergraph/KBestExtractor.java
@@ -912,12 +912,12 @@ public class KBestExtractor {
      * @return
      */
     private String quoteTerminals(String words) {
-      String quotedWords = "";
+      StringBuilder quotedWords = new StringBuilder();
       for (String word: words.split("\\s+"))
         if (word.startsWith("[") && word.endsWith("]"))
-          quotedWords += String.format("%s ", word);
+          quotedWords.append(String.format("%s ", word));
         else
-        quotedWords += String.format("\"%s\" ", word);
+        quotedWords.append(String.format("\"%s\" ", word));
 
       return quotedWords.substring(0, quotedWords.length() - 1);
     }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/io/TranslationRequestStream.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/TranslationRequestStream.java b/src/joshua/decoder/io/TranslationRequestStream.java
index 8f1c754..47f5d81 100644
--- a/src/joshua/decoder/io/TranslationRequestStream.java
+++ b/src/joshua/decoder/io/TranslationRequestStream.java
@@ -58,7 +58,7 @@ public class TranslationRequestStream {
   private StreamHandler requestHandler = null;
 
   /* Whether the request has been killed by a broken client connection. */
-  private boolean isShutDown = false;
+  private volatile boolean isShutDown = false;
 
   public TranslationRequestStream(BufferedReader reader, JoshuaConfiguration joshuaConfiguration) {
     this.joshuaConfiguration = joshuaConfiguration;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
index 985a9fe..588850b 100644
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ b/src/joshua/decoder/segment_file/Sentence.java
@@ -303,11 +303,12 @@ public class Sentence {
    * @return
    */
   public String source() {
-    String str = "";
+    StringBuilder str = new StringBuilder();
     int[] ids = getWordIDs();
-    for (int i = 1; i < ids.length - 1; i++)
-      str += Vocabulary.word(ids[i]) + " ";
-    return str.trim();
+    for (int i = 1; i < ids.length - 1; i++) {
+      str.append(Vocabulary.word(ids[i])).append(" ");
+    }
+    return str.toString().trim();
   }
 
   /**
@@ -347,16 +348,16 @@ public class Sentence {
   public String source(int i, int j) {
     StringTokenizer st = new StringTokenizer(fullSource());
     int index = 0;
-    String substring = "";
+    StringBuilder substring = new StringBuilder();
     while (st.hasMoreTokens()) {
       String token = st.nextToken();
       if (index >= j)
         break;
       if (index >= i)
-        substring += token + " ";
+        substring.append(token).append(" ");
       index++;
     }
-    return substring.trim();
+    return substring.toString().trim();
   }
 
   public String[] references() {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/lattice/Lattice.java
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/Lattice.java b/src/joshua/lattice/Lattice.java
index bf2bf87..b0ef40f 100644
--- a/src/joshua/lattice/Lattice.java
+++ b/src/joshua/lattice/Lattice.java
@@ -199,8 +199,8 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
       while (arcMatcher.matches()) {
         numArcs++;
         String arcLabel = arcMatcher.group(1);
-        float arcWeight = Float.valueOf(arcMatcher.group(2));
-        int destinationNodeID = nodeID + Integer.valueOf(arcMatcher.group(3));
+        float arcWeight = Float.parseFloat(arcMatcher.group(2));
+        int destinationNodeID = nodeID + Integer.parseInt(arcMatcher.group(3));
 
         Node<Token> destinationNode;
         if (destinationNodeID < nodes.size() && nodes.get(destinationNodeID) != null) {
@@ -279,7 +279,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
       while (arcMatcher.matches()) {
         String arcLabel = arcMatcher.group(1);
         float arcWeight = Float.valueOf(arcMatcher.group(2));
-        int destinationNodeID = nodeID + Integer.valueOf(arcMatcher.group(3));
+        int destinationNodeID = nodeID + Integer.parseInt(arcMatcher.group(3));
 
         Node<String> destinationNode;
         if (nodes.containsKey(destinationNodeID)) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/metrics/BLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/BLEU.java b/src/joshua/metrics/BLEU.java
index 8f3a92a..95c6cee 100644
--- a/src/joshua/metrics/BLEU.java
+++ b/src/joshua/metrics/BLEU.java
@@ -20,6 +20,7 @@ package joshua.metrics;
 
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
 import java.util.logging.Logger;
 
 public class BLEU extends EvaluationMetric {
@@ -115,11 +116,9 @@ public class BLEU extends EvaluationMetric {
       // ...and update as necessary from the other reference translations
       for (int r = 1; r < refsPerSen; ++r) {
         HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-        Iterator<String> it = (nextNgramCounts.keySet()).iterator();
-
-        while (it.hasNext()) {
-          gram = it.next();
-          nextCount = nextNgramCounts.get(gram);
+        for (Map.Entry<String, Integer> entry : nextNgramCounts.entrySet()) { 
+          gram = entry.getKey();
+          nextCount = entry.getValue();
 
           if (maxNgramCounts[i].containsKey(gram)) { // update if necessary
             oldCount = maxNgramCounts[i].get(gram);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/metrics/EvaluationMetric.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/EvaluationMetric.java b/src/joshua/metrics/EvaluationMetric.java
index f22bf47..4dd9fbd 100644
--- a/src/joshua/metrics/EvaluationMetric.java
+++ b/src/joshua/metrics/EvaluationMetric.java
@@ -284,12 +284,12 @@ public abstract class EvaluationMetric {
 
         int[][] SS = suffStats(cand_strings, cand_indices);
         for (int d = 0; d < size; ++d) {
-          String stats_str = "";
+          StringBuilder stats_str = new StringBuilder();
 
           for (int s = 0; s < suffStatsCount - 1; ++s) {
-            stats_str += SS[d][s] + " ";
+            stats_str.append(SS[d][s]).append(" ");
           }
-          stats_str += SS[d][suffStatsCount - 1];
+          stats_str.append(SS[d][suffStatsCount - 1]);
 
           outFile.println(stats_str);
         }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/metrics/MinimumChangeBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/MinimumChangeBLEU.java b/src/joshua/metrics/MinimumChangeBLEU.java
index 17f78ee..fa764c3 100644
--- a/src/joshua/metrics/MinimumChangeBLEU.java
+++ b/src/joshua/metrics/MinimumChangeBLEU.java
@@ -20,6 +20,7 @@ package joshua.metrics;
 
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
 import java.util.logging.Logger;
 
 import joshua.util.Algorithms;
@@ -77,11 +78,9 @@ public class MinimumChangeBLEU extends BLEU {
           maxNgramCounts[i] = getNgramCountsAll(refSentences[i][r]);
         } else {
           HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-          Iterator<String> it = (nextNgramCounts.keySet()).iterator();
-
-          while (it.hasNext()) {
-            gram = it.next();
-            nextCount = nextNgramCounts.get(gram);
+          for (Map.Entry<String, Integer> entry : nextNgramCounts.entrySet()) {
+            gram = entry.getKey();
+            nextCount = entry.getValue();
 
             if (maxNgramCounts[i].containsKey(gram)) {
               oldCount = maxNgramCounts[i].get(gram);
@@ -214,7 +213,7 @@ public class MinimumChangeBLEU extends BLEU {
 
   public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
     double wer = stats[suffStatsCount - 1] / stats[suffStatsCount - 3];
-    double wer_penalty = (wer >= thresholdWER) ? 1.0 : (wer / thresholdWER);
+    double wer_penalty = (wer >= thresholdWER) ? 1.0d : (wer / thresholdWER);
 
     System.out.println("WER_penalty = " + wer_penalty);
     System.out.println("MC_BLEU= " + score(stats));

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/metrics/Precis.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/Precis.java b/src/joshua/metrics/Precis.java
index 865c6cf..82f4106 100644
--- a/src/joshua/metrics/Precis.java
+++ b/src/joshua/metrics/Precis.java
@@ -20,6 +20,7 @@ package joshua.metrics;
 
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
 import java.util.logging.Logger;
 
 import joshua.util.Algorithms;
@@ -122,11 +123,9 @@ public class Precis extends BLEU {
           maxNgramCounts[i] = getNgramCountsAll(refSentences[i][r]);
         } else {
           HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-          Iterator<String> it = (nextNgramCounts.keySet()).iterator();
-
-          while (it.hasNext()) {
-            gram = it.next();
-            nextCount = nextNgramCounts.get(gram);
+          for ( Map.Entry<String, Integer> entry : nextNgramCounts.entrySet() ) {
+            gram = entry.getKey();
+            nextCount = entry.getValue();
 
             if (maxNgramCounts[i].containsKey(gram)) {
               oldCount = maxNgramCounts[i].get(gram);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/util/JoshuaEval.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/JoshuaEval.java b/src/joshua/util/JoshuaEval.java
index 6e12de5..6c0761a 100644
--- a/src/joshua/util/JoshuaEval.java
+++ b/src/joshua/util/JoshuaEval.java
@@ -115,12 +115,11 @@ public class JoshuaEval {
     String[] topCand_str = new String[numSentences];
 
     // BUG: all of this needs to be replaced with the SegmentFileParser and related interfaces.
-    try {
+    try (InputStream inStream = new FileInputStream(new File(inFileName));
+        BufferedReader inFile = new BufferedReader(new InputStreamReader(inStream, "utf8"))) {
 
       // read the candidates
 
-      InputStream inStream = new FileInputStream(new File(inFileName));
-      BufferedReader inFile = new BufferedReader(new InputStreamReader(inStream, "utf8"));
       String line, candidate_str;
 
       if (inFileFormat.equals("plain")) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/util/Lists.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/Lists.java b/src/joshua/util/Lists.java
index 8f51cc0..43ffa00 100644
--- a/src/joshua/util/Lists.java
+++ b/src/joshua/util/Lists.java
@@ -67,6 +67,9 @@ public class Lists {
           }
 
           public Integer next() {
+            if (!hasNext()) {
+              throw new NoSuchElementException();
+            }
             int result = next;
             next += 1;
             return result;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fd3cfcb/src/joshua/util/encoding/FeatureTypeAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/encoding/FeatureTypeAnalyzer.java b/src/joshua/util/encoding/FeatureTypeAnalyzer.java
index c9b77e9..4a8861c 100644
--- a/src/joshua/util/encoding/FeatureTypeAnalyzer.java
+++ b/src/joshua/util/encoding/FeatureTypeAnalyzer.java
@@ -206,17 +206,11 @@ public class FeatureTypeAnalyzer {
     }
 
     FeatureType(String key) {
+      // either throws or returns non-null
       FloatEncoder e = EncoderFactory.getFloatEncoder(key);
-      if (e != null) {
-        encoder = e;
-        analyzer = null;
-        bits = -1;
-      } else if ("8bit".equals(key)) {
-        encoder = null;
-        analyzer = new Analyzer();
-        bits = 8;
-      } else
-        throw new RuntimeException("Unsupported encoder type: " + key);
+      encoder = e;
+      analyzer = null;
+      bits = -1;
     }
 
     void inferUncompressedType() {

[6/6] incubator-joshua git commit: Merge branch 'performance'

Posted by mj...@apache.org.

Merge branch 'performance'


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2c02feaf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2c02feaf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2c02feaf

Branch: refs/heads/master
Commit: 2c02feafed5fdc2b9aed551d8d0e13ecc2a51c6e
Parents: cf5fbb5 fb5f720
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon May 2 23:07:28 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon May 2 23:07:28 2016 -0400

----------------------------------------------------------------------
 src/joshua/corpus/Span.java                     |  10 +-
 src/joshua/corpus/Vocabulary.java               | 188 ++++++++++---------
 src/joshua/decoder/BLEU.java                    |  13 +-
 src/joshua/decoder/Decoder.java                 |   4 +-
 src/joshua/decoder/JoshuaConfiguration.java     |   2 +-
 src/joshua/decoder/chart_parser/DotChart.java   |  10 +-
 src/joshua/decoder/ff/FeatureVector.java        |  16 +-
 src/joshua/decoder/ff/LabelCombinationFF.java   |   8 +-
 src/joshua/decoder/ff/RuleShape.java            |   8 +-
 src/joshua/decoder/ff/fragmentlm/Tree.java      |  22 +--
 .../ff/lm/StateMinimizingLanguageModel.java     |   4 +-
 .../ff/lm/berkeley_lm/LMGrammarBerkeley.java    |  49 +++--
 .../ff/similarity/EdgePhraseSimilarityFF.java   |   8 +-
 .../decoder/ff/tm/SentenceFilteredGrammar.java  |   4 +-
 .../GrammarBuilderWalkerFunction.java           |   5 -
 src/joshua/decoder/hypergraph/HyperGraph.java   |  14 +-
 .../decoder/hypergraph/KBestExtractor.java      |   6 +-
 .../decoder/io/TranslationRequestStream.java    |   2 +-
 src/joshua/decoder/segment_file/Sentence.java   |  15 +-
 src/joshua/lattice/Lattice.java                 |   6 +-
 src/joshua/metrics/BLEU.java                    |   9 +-
 src/joshua/metrics/EvaluationMetric.java        |   6 +-
 src/joshua/metrics/MinimumChangeBLEU.java       |  11 +-
 src/joshua/metrics/Precis.java                  |   9 +-
 src/joshua/util/JoshuaEval.java                 |   5 +-
 src/joshua/util/Lists.java                      |   3 +
 .../util/encoding/FeatureTypeAnalyzer.java      |  14 +-
 tst/joshua/corpus/VocabularyTest.java           | 118 ++++++++++++
 .../LMBerkeleySentenceProbablityTest.java       |  29 +++
 29 files changed, 377 insertions(+), 221 deletions(-)
----------------------------------------------------------------------

[3/6] incubator-joshua git commit: Reduce concurrency on Vocab by using opt stamped rw locks Drops concurrency in Vocab class from 7min14s to 0m0s during training run of 50K

Posted by mj...@apache.org.

Reduce concurrency on Vocab by using opt stamped rw locks
Drops concurrency in Vocab class from 7min14s to 0m0s during training run of 50K


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/cb700140
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/cb700140
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/cb700140

Branch: refs/heads/master
Commit: cb7001406da2f601dac51669d56648342c881b45
Parents: 39f59a8
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Wed Jan 27 11:55:13 2016 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 12:27:39 2016 -0700

----------------------------------------------------------------------
 src/joshua/corpus/Vocabulary.java | 188 +++++++++++++++++++--------------
 1 file changed, 106 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/cb700140/src/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index 1792219..ee59507 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -1,21 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
 package joshua.corpus;
 
 import static joshua.util.FormatUtils.isNonterminal;
@@ -32,6 +14,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.locks.StampedLock;
 
 import joshua.decoder.Decoder;
 import joshua.decoder.ff.lm.NGramLanguageModel;
@@ -40,33 +23,32 @@ import joshua.util.FormatUtils;
 /**
  * Static singular vocabulary class.
  * Supports (de-)serialization into a vocabulary file.
- * 
+ *
  * @author Juri Ganitkevitch
  */
 
 public class Vocabulary {
 
-  private final static ArrayList<NGramLanguageModel> LMs = new ArrayList<NGramLanguageModel>();
+  private final static ArrayList<NGramLanguageModel> LMs = new ArrayList<>();
 
   private static List<String> idToString;
   private static Map<String, Integer> stringToId;
-  
+  private static final StampedLock lock = new StampedLock();
   private static volatile List<Integer> nonTerminalIndices;
 
-  private static final Integer lock = new Integer(0);
-
   static final int UNKNOWN_ID = 0;
   static final String UNKNOWN_WORD = "<unk>";
 
   public static final String START_SYM = "<s>";
   public static final String STOP_SYM = "</s>";
-  
+
   static {
     clear();
   }
 
   public static boolean registerLanguageModel(NGramLanguageModel lm) {
-    synchronized (lock) {
+    long lock_stamp = lock.writeLock();
+    try {
       // Store the language model.
       LMs.add(lm);
       // Notify it of all the existing words.
@@ -74,39 +56,40 @@ public class Vocabulary {
       for (int i = idToString.size() - 1; i > 0; i--)
         collision = collision || lm.registerWord(idToString.get(i), i);
       return collision;
+    } finally {
+      lock.unlockWrite(lock_stamp);
     }
   }
 
   /**
    * Reads a vocabulary from file. This deletes any additions to the vocabulary made prior to
    * reading the file.
-   * 
+   *
    * @param file_name
    * @return Returns true if vocabulary was read without mismatches or collisions.
    * @throws IOException
    */
   public static boolean read(final File vocab_file) throws IOException {
-    synchronized (lock) {
-      DataInputStream vocab_stream =
-          new DataInputStream(new BufferedInputStream(new FileInputStream(vocab_file)));
-      int size = vocab_stream.readInt();
-      Decoder.LOG(1, String.format("Read %d entries from the vocabulary", size));
-      clear();
-      for (int i = 0; i < size; i++) {
-        int id = vocab_stream.readInt();
-        String token = vocab_stream.readUTF();
-        if (id != Math.abs(id(token))) {
-          vocab_stream.close();
-          return false;
-        }
+    DataInputStream vocab_stream =
+        new DataInputStream(new BufferedInputStream(new FileInputStream(vocab_file)));
+    int size = vocab_stream.readInt();
+    Decoder.LOG(1, String.format("Read %d entries from the vocabulary", size));
+    clear();
+    for (int i = 0; i < size; i++) {
+      int id = vocab_stream.readInt();
+      String token = vocab_stream.readUTF();
+      if (id != Math.abs(id(token))) {
+        vocab_stream.close();
+        return false;
       }
-      vocab_stream.close();
-      return (size + 1 == idToString.size());
     }
+    vocab_stream.close();
+    return (size + 1 == idToString.size());
   }
 
   public static void write(String file_name) throws IOException {
-    synchronized (lock) {
+    long lock_stamp =lock.readLock();
+    try {
       File vocab_file = new File(file_name);
       DataOutputStream vocab_stream =
           new DataOutputStream(new BufferedOutputStream(new FileOutputStream(vocab_file)));
@@ -118,44 +101,81 @@ public class Vocabulary {
       }
       vocab_stream.close();
     }
+    finally{
+      lock.unlockRead(lock_stamp);
+    }
   }
 
   /**
    * Get the id of the token if it already exists, new id is created otherwise.
-   * 
-   * TODO: currently locks for every call.
-   * Separate constant (frozen) ids from changing (e.g. OOV) ids.
-   * Constant ids could be immutable -> no locking.
-   * Alternatively: could we use ConcurrentHashMap to not have to lock if actually contains it and only lock for modifications? 
+   *
+   * TODO: currently locks for every call. Separate constant (frozen) ids from
+   * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
+   * Alternatively: could we use ConcurrentHashMap to not have to lock if
+   * actually contains it and only lock for modifications?
    */
   public static int id(String token) {
-    synchronized (lock) {
+    // First attempt an optimistic read
+    long attempt_read_lock = lock.tryOptimisticRead();
+    if (stringToId.containsKey(token)) {
+      int resultId = stringToId.get(token);
+      if (lock.validate(attempt_read_lock)) {
+        return resultId;
+      }
+    }
+else {
+        if (nonTerminalIndices != null && nt(token)) {
+          throw new IllegalArgumentException(
+              "After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
+        }
+      }
+
+    // The optimistic read failed, try a read with a stamped read lock
+    long read_lock_stamp = lock.readLock();
+    try {
       if (stringToId.containsKey(token)) {
         return stringToId.get(token);
       } else {
         if (nonTerminalIndices != null && nt(token)) {
-          throw new IllegalArgumentException("After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
+          throw new IllegalArgumentException(
+              "After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
         }
-        int id = idToString.size() * (nt(token) ? -1 : 1);
-
-        // register this (token,id) mapping with each language
-        // model, so that they can map it to their own private
-        // vocabularies
-        for (NGramLanguageModel lm : LMs)
-          lm.registerWord(token, Math.abs(id));
+      }
+    } finally {
+      lock.unlockRead(read_lock_stamp);
+    }
 
-        idToString.add(token);
-        stringToId.put(token, id);
-        return id;
+    // Looks like the id we want is not there, let's get a write lock and add it
+    long write_lock_stamp = lock.writeLock();
+    try {
+      if (stringToId.containsKey(token)) {
+        return stringToId.get(token);
       }
+      int id = idToString.size() * (nt(token) ? -1 : 1);
+
+      // register this (token,id) mapping with each language
+      // model, so that they can map it to their own private
+      // vocabularies
+      for (NGramLanguageModel lm : LMs)
+        lm.registerWord(token, Math.abs(id));
+
+      idToString.add(token);
+      stringToId.put(token, id);
+      return id;
+    } finally {
+      lock.unlockWrite(write_lock_stamp);
     }
   }
 
   public static boolean hasId(int id) {
-    synchronized (lock) {
+    long lock_stamp = lock.readLock();
+    try {
       id = Math.abs(id);
       return (id < idToString.size());
     }
+    finally{
+      lock.unlockRead(lock_stamp);
+    }
   }
 
   public static int[] addAll(String sentence) {
@@ -170,10 +190,14 @@ public class Vocabulary {
   }
 
   public static String word(int id) {
-    synchronized (lock) {
+    long lock_stamp = lock.readLock();
+    try {
       id = Math.abs(id);
       return idToString.get(id);
     }
+    finally{
+      lock.unlockRead(lock_stamp);
+    }
   }
 
   public static String getWords(int[] ids) {
@@ -190,19 +214,15 @@ public class Vocabulary {
       sb.append(word(id)).append(" ");
     return sb.deleteCharAt(sb.length() - 1).toString();
   }
-  
+
   /**
    * This method returns a list of all (positive) indices
    * corresponding to Nonterminals in the Vocabulary.
    */
-  public static List<Integer> getNonterminalIndices()
+  public static synchronized List<Integer> getNonterminalIndices()
   {
     if (nonTerminalIndices == null) {
-      synchronized (lock) {
-        if (nonTerminalIndices == null) {
-          nonTerminalIndices = findNonTerminalIndices();
-        }
-      }
+      nonTerminalIndices = findNonTerminalIndices();
     }
     return nonTerminalIndices;
   }
@@ -211,7 +231,7 @@ public class Vocabulary {
    * Iterates over the Vocabulary and finds all non terminal indices.
    */
   private static List<Integer> findNonTerminalIndices() {
-    List<Integer> nonTerminalIndices = new ArrayList<Integer>();
+    List<Integer> nonTerminalIndices = new ArrayList<>();
     for(int i = 0; i < idToString.size(); i++) {
       final String word = idToString.get(i);
       if(isNonterminal(word)){
@@ -230,8 +250,8 @@ public class Vocabulary {
   }
 
   /**
-   * Returns true if the Vocabulary ID represents a nonterminal. 
-   * 
+   * Returns true if the Vocabulary ID represents a nonterminal.
+   *
    * @param id
    * @return
    */
@@ -244,33 +264,37 @@ public class Vocabulary {
   }
 
   public static int size() {
-    synchronized (lock) {
+    long lock_stamp = lock.readLock();
+    try {
       return idToString.size();
+    } finally {
+      lock.unlockRead(lock_stamp);
     }
   }
 
-  public static int getTargetNonterminalIndex(int id) {
+  public static synchronized int getTargetNonterminalIndex(int id) {
     return FormatUtils.getNonterminalIndex(word(id));
   }
 
   /**
-   * Clears the vocabulary and initializes it with an unknown word.
-   * Registered language models are left unchanged.
+   * Clears the vocabulary and initializes it with an unknown word. Registered
+   * language models are left unchanged.
    */
   public static void clear() {
-    synchronized (lock) {
-      nonTerminalIndices = null;
-
+    long lock_stamp = lock.writeLock();
+    try {
       idToString = new ArrayList<String>();
-      stringToId = new HashMap<String, Integer>();      
-  
+      stringToId = new HashMap<String, Integer>();
+
       idToString.add(UNKNOWN_ID, UNKNOWN_WORD);
       stringToId.put(UNKNOWN_WORD, UNKNOWN_ID);
+    } finally {
+      lock.unlockWrite(lock_stamp);
     }
   }
-  
+
   public static void unregisterLanguageModels() {
     LMs.clear();
   }
-  
+
 }

[4/6] incubator-joshua git commit: Removed nonTerminalIndices functionality as it is no longer in use

Posted by mj...@apache.org.

Removed nonTerminalIndices functionality as it is no longer in use


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ef6d5686
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ef6d5686
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ef6d5686

Branch: refs/heads/master
Commit: ef6d5686380fc0965182bb5432adfb35eccab193
Parents: cb70014
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Wed Jan 27 13:35:21 2016 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 12:29:06 2016 -0700

----------------------------------------------------------------------
 src/joshua/corpus/Vocabulary.java     |  40 ----------
 tst/joshua/corpus/VocabularyTest.java | 118 +++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef6d5686/src/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index ee59507..12d184d 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -1,7 +1,5 @@
 package joshua.corpus;
 
-import static joshua.util.FormatUtils.isNonterminal;
-
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
@@ -34,7 +32,6 @@ public class Vocabulary {
   private static List<String> idToString;
   private static Map<String, Integer> stringToId;
   private static final StampedLock lock = new StampedLock();
-  private static volatile List<Integer> nonTerminalIndices;
 
   static final int UNKNOWN_ID = 0;
   static final String UNKNOWN_WORD = "<unk>";
@@ -123,23 +120,12 @@ public class Vocabulary {
         return resultId;
       }
     }
-else {
-        if (nonTerminalIndices != null && nt(token)) {
-          throw new IllegalArgumentException(
-              "After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
-        }
-      }
 
     // The optimistic read failed, try a read with a stamped read lock
     long read_lock_stamp = lock.readLock();
     try {
       if (stringToId.containsKey(token)) {
         return stringToId.get(token);
-      } else {
-        if (nonTerminalIndices != null && nt(token)) {
-          throw new IllegalArgumentException(
-              "After the nonterminal indices have been set by calling getNonterminalIndices you can't call id on new nonterminals anymore.");
-        }
       }
     } finally {
       lock.unlockRead(read_lock_stamp);
@@ -215,32 +201,6 @@ else {
     return sb.deleteCharAt(sb.length() - 1).toString();
   }
 
-  /**
-   * This method returns a list of all (positive) indices
-   * corresponding to Nonterminals in the Vocabulary.
-   */
-  public static synchronized List<Integer> getNonterminalIndices()
-  {
-    if (nonTerminalIndices == null) {
-      nonTerminalIndices = findNonTerminalIndices();
-    }
-    return nonTerminalIndices;
-  }
-
-  /**
-   * Iterates over the Vocabulary and finds all non terminal indices.
-   */
-  private static List<Integer> findNonTerminalIndices() {
-    List<Integer> nonTerminalIndices = new ArrayList<>();
-    for(int i = 0; i < idToString.size(); i++) {
-      final String word = idToString.get(i);
-      if(isNonterminal(word)){
-        nonTerminalIndices.add(i);
-      }
-    }
-    return nonTerminalIndices;
-  }
-
   public static int getUnknownId() {
     return UNKNOWN_ID;
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef6d5686/tst/joshua/corpus/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/corpus/VocabularyTest.java b/tst/joshua/corpus/VocabularyTest.java
new file mode 100644
index 0000000..724d9c7
--- /dev/null
+++ b/tst/joshua/corpus/VocabularyTest.java
@@ -0,0 +1,118 @@
+// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+package joshua.corpus;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class VocabularyTest {
+  private static final String WORD1 = "word1";
+  private static final String WORD2 = "word2";
+  private static final String NON_TERMINAL = "[X]";
+  private static final String GOAL = "[GOAL]";
+
+  @Before
+  public void init() {
+    Vocabulary.clear();
+  }
+  
+  @After
+  public void deinit() {
+    Vocabulary.clear();
+  }
+  
+  @Test
+  public void givenVocabulary_whenEmpty_thenOnlyContainsUnknownWord() {
+    assertTrue(Vocabulary.hasId(Vocabulary.UNKNOWN_ID));
+    assertFalse(Vocabulary.hasId(1));
+    assertFalse(Vocabulary.hasId(-1));
+    assertEquals(Vocabulary.UNKNOWN_WORD, Vocabulary.word(Vocabulary.UNKNOWN_ID));
+    assertEquals(1, Vocabulary.size());
+  }
+  
+  @Test
+  public void givenVocabulary_whenNewWord_thenMappingIsAdded() {
+    final int FIRST_WORD_ID = 1;
+    assertFalse(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    //should return same id after second call:
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    assertTrue(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(WORD1, Vocabulary.word(FIRST_WORD_ID));
+    assertEquals(2, Vocabulary.size());
+  }
+  
+  @Test
+  public void givenVocabulary_whenCheckingStringInBracketsOrNegativeNumber_thenIsNonTerminal() {
+    //non-terminals
+    assertTrue(Vocabulary.nt(NON_TERMINAL));
+    //terminals
+    assertFalse(Vocabulary.nt(WORD1));
+    assertFalse(Vocabulary.nt("[]"));
+    assertFalse(Vocabulary.nt("["));
+    assertFalse(Vocabulary.nt("]"));
+    assertFalse(Vocabulary.nt(""));
+    
+    //negative numbers indicate non-terminals
+    assertTrue(Vocabulary.nt(-1));
+    assertTrue(Vocabulary.nt(-5));
+    
+    //positive numbers indicate terminals:
+    assertFalse(Vocabulary.nt(0));
+    assertFalse(Vocabulary.nt(5));
+
+    
+  }
+  
+  @Test
+  public void givenVocabulary_whenNonTerminal_thenReturnsStrictlyPositiveNonTerminalIndices() {
+    final int FIRST_NON_TERMINAL_INDEX = 1;
+    assertTrue(Vocabulary.id(NON_TERMINAL) < 0);
+    assertTrue(Vocabulary.hasId(FIRST_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-FIRST_NON_TERMINAL_INDEX));
+    
+    assertTrue(Vocabulary.id("") > 0);
+    assertTrue(Vocabulary.id(WORD1) > 0);
+    
+    final int SECOND_NON_TERMINAL_INDEX = 4;
+    assertTrue(Vocabulary.id(GOAL) < 0);
+    assertTrue(Vocabulary.hasId(SECOND_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-SECOND_NON_TERMINAL_INDEX));
+    
+    assertTrue(Vocabulary.id(WORD2) > 0);
+  }
+  
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+  
+  @Test
+  public void givenVocabulary_whenWritenAndReading_thenVocabularyStaysTheSame() throws IOException {
+    File vocabFile = folder.newFile();
+    
+    int id1 = Vocabulary.id(WORD1);
+    int id2 = Vocabulary.id(NON_TERMINAL);
+    int id3 = Vocabulary.id(WORD2);
+    
+    Vocabulary.write(vocabFile.getAbsolutePath());
+    
+    Vocabulary.clear();
+    
+    Vocabulary.read(vocabFile);
+    
+    assertEquals(4, Vocabulary.size()); //unknown word + 3 other words
+    assertTrue(Vocabulary.hasId(id1));
+    assertTrue(Vocabulary.hasId(id2));
+    assertTrue(Vocabulary.hasId(id3));
+    assertEquals(id1, Vocabulary.id(WORD1));
+    assertEquals(id2, Vocabulary.id(NON_TERMINAL));
+    assertEquals(id3, Vocabulary.id(WORD2));
+  }
+}

[5/6] incubator-joshua git commit: Update Vocabulary.java

Posted by mj...@apache.org.

Update Vocabulary.java

Restore the apache Header (lost in a cherry-pick)


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/fb5f720c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/fb5f720c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/fb5f720c

Branch: refs/heads/master
Commit: fb5f720cde3c8b937e2473017689667b07ff4f19
Parents: ef6d568
Author: Kellen Sunderland <ke...@gmail.com>
Authored: Mon May 2 14:54:41 2016 -0700
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 14:59:32 2016 -0700

----------------------------------------------------------------------
 src/joshua/corpus/Vocabulary.java | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/fb5f720c/src/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/joshua/corpus/Vocabulary.java b/src/joshua/corpus/Vocabulary.java
index 12d184d..d79170d 100644
--- a/src/joshua/corpus/Vocabulary.java
+++ b/src/joshua/corpus/Vocabulary.java
@@ -1,3 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package joshua.corpus;
 
 import java.io.BufferedInputStream;

[2/6] incubator-joshua git commit: Optimized allocations with sub-array indexes

Posted by mj...@apache.org.

Optimized allocations with sub-array indexes

------------
last 10 minutes of recordings taken
Statistics:
Before
Total TLAB: 1,391.77 GB
Allocation rate: 2.32 GB/s

After
Total TLAB: 1,320.95 GB
Allocation rate: 2.20 GB/s
------------
Results
-10 GB/s allocation rate
java.utils.Arrays.copyOfRange -70GB allocations from joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley.sentenceLogProbability

10K translation performance testing
Before: avg 5.29
After: avg 5.24


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/39f59a8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/39f59a8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/39f59a8d

Branch: refs/heads/master
Commit: 39f59a8d7950f362cc52b2414dbd53efc130e404
Parents: 7fd3cfc
Author: Pavel Danchenko <da...@amazon.com>
Authored: Wed Feb 10 17:12:15 2016 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 12:23:39 2016 -0700

----------------------------------------------------------------------
 .../ff/lm/berkeley_lm/LMGrammarBerkeley.java    | 49 +++++++++++++-------
 .../LMBerkeleySentenceProbablityTest.java       | 29 ++++++++++++
 2 files changed, 62 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/39f59a8d/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
index d5728cf..2716576 100644
--- a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
+++ b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
@@ -24,6 +24,8 @@ import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import joshua.corpus.Vocabulary;
 import joshua.decoder.ff.lm.DefaultNGramLanguageModel;
 import joshua.decoder.Decoder;
@@ -37,7 +39,7 @@ import edu.berkeley.nlp.lm.util.StrUtils;
 
 /**
  * This class wraps Berkeley LM.
- * 
+ *
  * @author adpauls@gmail.com
  */
 public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
@@ -120,9 +122,9 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
     for (int j = startIndex; j < order && j <= sentenceLength; j++) {
       // TODO: startIndex dependens on the order, e.g., this.ngramOrder-1 (in srilm, for 3-gram lm,
       // start_index=2. othercase, need to check)
-      int[] ngram = Arrays.copyOfRange(sentence, 0, j);
-      double logProb = ngramLogProbability_helper(ngram, false);
+      double logProb = ngramLogProbability_helper(sentence, 0, j, false);
       if (logger.isLoggable(Level.FINE)) {
+        int[] ngram = Arrays.copyOfRange(sentence, 0, j);
         String words = Vocabulary.getWords(ngram);
         logger.fine("\tlogp ( " + words + " )  =  " + logProb);
       }
@@ -131,9 +133,9 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
 
     // regular-order ngrams
     for (int i = 0; i <= sentenceLength - order; i++) {
-      int[] ngram = Arrays.copyOfRange(sentence, i, i + order);
-      double logProb =  ngramLogProbability_helper(ngram, false);
+      double logProb =  ngramLogProbability_helper(sentence, i, order, false);
       if (logger.isLoggable(Level.FINE)) {
+        int[] ngram = Arrays.copyOfRange(sentence, i, i + order);
         String words = Vocabulary.getWords(ngram);
         logger.fine("\tlogp ( " + words + " )  =  " + logProb);
       }
@@ -147,26 +149,26 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
   public float ngramLogProbability_helper(int[] ngram, int order) {
     return ngramLogProbability_helper(ngram, false);
   }
-  
+
   protected float ngramLogProbability_helper(int[] ngram, boolean log) {
+    return ngramLogProbability_helper(ngram, 0, ngram.length, log);
+  }
 
+  protected float ngramLogProbability_helper(int sentence[], int ngramStartPos, int ngramLength, boolean log) {
     int[] mappedNgram = arrayScratch.get();
-    if (mappedNgram.length < ngram.length) {
-      arrayScratch.set(mappedNgram = new int[mappedNgram.length * 2]);
+    if (mappedNgram.length < ngramLength) {
+      mappedNgram = new int[mappedNgram.length * 2];
+      arrayScratch.set(mappedNgram);
     }
-    for (int i = 0; i < ngram.length; ++i) {
-      mappedNgram[i] = vocabIdToMyIdMapping[ngram[i]];
+    for (int i = 0; i < ngramLength; ++i) {
+      mappedNgram[i] = vocabIdToMyIdMapping[sentence[ngramStartPos + i]];
     }
 
     if (log && logRequests) {
-      final int[] copyOf = Arrays.copyOf(mappedNgram, ngram.length);
-      for (int i = 0; i < copyOf.length; ++i)
-        if (copyOf[i] < 0) copyOf[i] = unkIndex;
-      logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
+      dumpBuffer(mappedNgram, ngramLength);
     }
-    final float res = lm.getLogProb(mappedNgram, 0, ngram.length);
 
-    return res;
+    return lm.getLogProb(mappedNgram, 0, ngramLength);
   }
 
   public static void setLogRequests(Handler handler) {
@@ -183,4 +185,19 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
   public float ngramLogProbability(int[] ngram, int order) {
     return ngramLogProbability(ngram);
   }
+
+  private void dumpBuffer(int[] buffer, int len) {
+    final int[] copyOf = Arrays.copyOf(buffer, len);
+    for (int i = 0; i < copyOf.length; ++i) {
+      if (copyOf[i] < 0) {
+        copyOf[i] = unkIndex;
+      }
+    }
+    logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
+  }
+
+  @VisibleForTesting
+  ArrayEncodedNgramLanguageModel<String> getLM() {
+    return lm;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/39f59a8d/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java b/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
new file mode 100644
index 0000000..74a832e
--- /dev/null
+++ b/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@ -0,0 +1,29 @@
+package joshua.decoder.ff.lm.berkeley_lm;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
+
+public class LMBerkeleySentenceProbablityTest {
+
+  @Test
+  public void verifySentenceLogProbability() {
+    LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+    grammar.registerWord("the", 2);
+    grammar.registerWord("chat-rooms", 3);
+    grammar.registerWord("<unk>", 0);
+
+    ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
+    float expected =
+        lm.getLogProb(new int[] {}, 0, 0)
+        + lm.getLogProb(new int[] {0}, 0, 1)
+        + lm.getLogProb(new int[] {0, 2}, 0, 2)
+        + lm.getLogProb(new int[] {2, 3}, 0, 2)
+        + lm.getLogProb(new int[] {3, 0}, 0, 2);
+
+    float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
+    assertEquals(expected, result, 0.0);
+  }
+}