You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:45:46 UTC
[2/6] incubator-joshua git commit: Converted estimateRule to also make use of DirectBuffer. Reduced number of array copies in probRule. Removed sentence from estimate method signature (as it was unused). Created an abstraction in the KenLMPool class to h

Converted estimateRule to also make use of DirectBuffer.
Reduced number of array copies in probRule.
Removed sentence from estimate method signature (as it was unused).
Created an abstraction in the KenLMPool class to hide details of underlying ByteBuffer Indexing.
Fixed Test givenKenLm_whenQueryingWithState_thenStateAndProbReturned


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/c8d8a65b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/c8d8a65b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/c8d8a65b

Branch: refs/heads/master
Commit: c8d8a65b9352e51e777965994dae7f9337b08def
Parents: 9ea7eeb
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Thu Sep 15 19:31:21 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 20 13:26:27 2016 +0200

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                               | 19 ++++----
 .../org/apache/joshua/decoder/KenLMPool.java    | 26 ++++++++--
 .../joshua/decoder/ff/FeatureFunction.java      |  2 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |  2 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |  2 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |  2 +-
 .../apache/joshua/decoder/ff/StatelessFF.java   |  2 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |  2 +-
 .../apache/joshua/decoder/ff/WordPenalty.java   |  2 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |  2 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 25 +++-------
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |  2 +-
 .../ff/lm/StateMinimizingLanguageModel.java     | 51 +++++++++++---------
 .../ff/similarity/EdgePhraseSimilarityFF.java   |  2 +-
 .../org/apache/joshua/decoder/ff/tm/Rule.java   |  2 +-
 .../java/org/apache/joshua/util/Constants.java  |  1 +
 .../org/apache/joshua/system/KenLmTest.java     | 16 +++---
 17 files changed, 90 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index 0f3148a..8947a61 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -148,7 +148,7 @@ public:
   virtual float ProbString(jint * const begin, jint * const end,
       jint start) const = 0;
 
-  virtual float EstimateRule(jlong *begin, jlong *end) const = 0;
+  virtual float EstimateRule(const Chart &chart) const = 0;
 
   virtual uint8_t Order() const = 0;
 
@@ -229,7 +229,12 @@ public:
     return ruleScore.Finish();
   }
 
-  float EstimateRule(jlong * const begin, jlong * const end) const {
+  float EstimateRule(const Chart &chart) const {
+
+    // By convention the first long in the ngramBuffer denotes the size of the buffer
+    long* begin = chart.ngramBuffer_ + 1;
+    long* end = begin + *chart.ngramBuffer_;
+
     if (begin == end) return 0.0;
     lm::ngram::ChartState nullState;
     lm::ngram::RuleScore<Model> ruleScore(m_, nullState);
@@ -472,15 +477,11 @@ JNIEXPORT jlong JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_probRule(
 }
 
 JNIEXPORT jfloat JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_estimateRule(
-  JNIEnv *env, jclass, jlong pointer, jlongArray arr) {
-  jint length = env->GetArrayLength(arr);
-  // GCC only.
-  jlong values[length];
-  env->GetLongArrayRegion(arr, 0, length, values);
+  JNIEnv *env, jclass, jlong pointer, jlong chartPtr) {
 
   // Compute the probability
-  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(values,
-      values + length);
+  Chart* chart = reinterpret_cast<Chart*>(chartPtr);
+  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(*chart);
 }
 
 } // extern

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
index a1e709b..48aa912 100644
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
@@ -4,6 +4,8 @@ import org.apache.joshua.decoder.ff.lm.KenLM;
 
 import java.nio.ByteBuffer;
 
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
 /**
  * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
  * used in a scoped context, and close must be called to release native resources.  It
@@ -37,14 +39,32 @@ public class KenLMPool implements AutoCloseable {
   }
 
   @Override
-  public void close() {
+  public synchronized void close() {
     if (!released) {
       released = true;
       languageModel.destroyLMPool(pool);
     }
   }
 
-  public ByteBuffer getNgramBuffer() {
-    return ngramBuffer;
+  /**
+   * Write a single id to the KenLM shared buffer.
+   * Note: This method must be used in conjunction with setBufferLength.
+   *
+   * @param index index at which to write id.
+   * @param id    word id to write.
+   */
+  public void writeIdToBuffer(int index, long id) {
+    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
+  }
+
+  /**
+   * Manually set the length of the ngram array to be used when calling probRule or estimate on
+   * KenLM.
+   * Note: Must be used if you are calling writeIdToBuffer.
+   *
+   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
+   */
+  public void setBufferLength(long length) {
+    ngramBuffer.putLong(0, length);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index 8b17326..340af0e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -253,7 +253,7 @@ public abstract class FeatureFunction {
    * @param sentence {@link org.apache.joshua.lattice.Lattice} input
    * @return the *weighted* cost of applying the feature.
    */
-  public abstract float estimateCost(Rule rule, Sentence sentence);
+  public abstract float estimateCost(Rule rule);
 
   /**
    * This feature is called to produce a *weighted estimate* of the future cost

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 6f37fa4..3d851c5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -94,7 +94,7 @@ public class OOVPenalty extends StatelessFF {
    * Important! estimateCost returns the *weighted* feature value.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null && this.ownerID.equals(rule.getOwner()))
       return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
     return 0.0f;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index bd490dc..e11051a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -85,7 +85,7 @@ public class PhraseModel extends StatelessFF {
    * functions.
    */
   @Override
-  public float estimateCost(final Rule rule, Sentence sentence) {
+  public float estimateCost(final Rule rule) {
 
     if (rule != null && rule.getOwner().equals(ownerID)) {
       if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 2643729..0cfd28f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -78,7 +78,7 @@ public class PhrasePenalty extends StatelessFF {
    * 
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
         && (rule.getOwner().equals(owner)))
       return weights.getDense(denseFeatureIndex) * value;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
index e473c37..86a36c2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -49,7 +49,7 @@ public abstract class StatelessFF extends FeatureFunction {
    * The estimated cost of applying this feature, given only the rule. This is used in sorting the
    * rules for cube pruning. For most features, this will be 0.0.
    */
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index a9264e0..cb5c94d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -191,7 +191,7 @@ public class TargetBigram extends StatefulFF {
    * TargetBigram features are only computed across hyperedges, so there is nothing to be done here.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index da315ec..bd51be3 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -72,7 +72,7 @@ public final class WordPenalty extends StatelessFF {
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null)
       return weights.getDense(denseFeatureIndex) * OMEGA * (rule.getEnglish().length - rule.getArity());
     return 0.0f;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 7388262..944f5f2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -278,7 +278,7 @@ public class FragmentLMFF extends StatefulFF {
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     // TODO Auto-generated method stub
     return 0;
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index e8a9f0f..df16019 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -21,6 +21,7 @@ package org.apache.joshua.decoder.ff.lm;
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
+import org.apache.joshua.util.Constants;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -39,8 +40,6 @@ import java.nio.ByteBuffer;
 
 public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
-  private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
-
   private static final Logger LOG = LoggerFactory.getLogger(KenLM.class);
 
   // Maximum number of terminal and non-terminal symbols on a rule's target side
@@ -89,7 +88,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   private static native long probRule(long ptr, long pool);
 
-  private static native float estimateRule(long ptr, long words[]);
+  private static native float estimateRule(long ptr, long poolWrapper);
 
   private static native float probString(long ptr, int words[], int start);
 
@@ -108,7 +107,8 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   public KenLMPool createLMPool() {
-    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
+    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH *
+            Constants.LONG_SIZE_IN_BYTES);
     ngramBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
     long pool = createPool(ngramBuffer);
     return new KenLMPool(pool, this, ngramBuffer);
@@ -158,18 +158,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
    * state and the LM probability incurred along this rule.
    *
-   * @param words       array of words
    * @param poolWrapper an object that wraps a pool reference returned from KenLM createPool
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
-  public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
-
-    poolWrapper.getNgramBuffer().putLong(0, words.length);
-    for (int i = 0; i < words.length; i++) {
-      poolWrapper.getNgramBuffer().putLong((i + 1) * LONG_SIZE_IN_BYTES, words[i]);
-    }
-
+  public StateProbPair probRule(KenLMPool poolWrapper) {
     long packedResult = probRule(pointer, poolWrapper.getPool());
     int state = (int) (packedResult >> 32);
     float probVal = Float.intBitsToFloat((int) packedResult);
@@ -181,13 +174,12 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * Public facing function that estimates the cost of a rule, which value is used for sorting
    * rules during cube pruning.
    *
-   * @param words array of words
    * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
    */
-  public float estimateRule(long[] words) {
+  public float estimateRule(KenLMPool poolWrapper) {
     float estimate = 0.0f;
     try {
-      estimate = estimateRule(pointer, words);
+      estimate = estimateRule(pointer, poolWrapper.getPool());
     } catch (NoSuchMethodError e) {
       throw new RuntimeException(e);
     }
@@ -257,8 +249,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   /**
-   * Inner class used to hold the results returned from KenLM with left-state minimization. Note
-   * that inner classes have to be static to be accessible from the JNI!
+   * Inner class used to hold the results returned from KenLM with left-state minimization.
    */
   public static class StateProbPair {
     public KenLMState state = null;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index f5c1cb5..a29c754 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -325,7 +325,7 @@ public class LanguageModelFF extends StatefulFF {
    * n-grams on the left-hand side.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
 
     float lmEstimate = 0.0f;
     boolean considerIncompleteNgrams = true;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 2219ce8..155522b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -72,17 +72,19 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
    * efficiently than the default {@link LanguageModelFF} class.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
 
     int[] ruleWords = getRuleIds(rule);
 
-    // map to ken lm ids
-    final long[] words = mapToKenLmIds(ruleWords, null, true);
-    
-    // Get the probability of applying the rule and the new state
-    float lmCost = weight * ((KenLM) languageModel).estimateRule(words);
-    float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
-    return lmCost + oovCost;
+    try(KenLMPool poolWrapper = ((KenLM)languageModel).createLMPool();) {
+      // Write KenLM word ids to a shared ByteBuffer.
+      writeKenLmIds(ruleWords, null, poolWrapper);
+
+      // Get the probability of applying the rule and the new state
+      float lmCost = weight * ((KenLM)languageModel).estimateRule(poolWrapper);
+      float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
+      return lmCost + oovCost;
+    }
   }
 
   private UUID languageModelPoolId = UUID.randomUUID();
@@ -101,7 +103,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
 
     int[] ruleWords;
     if (config.source_annotations) {
-      // get source side annotations and project them to the target side
+      // Get source side annotations and project them to the target side
       ruleWords = getTags(rule, i, j, sentence);
     } else {
       ruleWords = getRuleIds(rule);
@@ -112,14 +114,16 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
       acc.add(oovDenseFeatureIndex, getOovs(ruleWords));
     }
 
-     // map to ken lm ids
-    final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
+    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId,
+            (KenLM)languageModel);
+
+     // Write KenLM ngram ids to the shared direct buffer
+    writeKenLmIds(ruleWords, tailNodes, statePool);
+
 
-    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
-            languageModel);
 
     // Get the probability of applying the rule and the new state
-    final StateProbPair pair = ((KenLM) languageModel).probRule(words, statePool);
+    final StateProbPair pair = ((KenLM)languageModel).probRule(statePool);
 
     // Record the prob
     acc.add(denseFeatureIndex, pair.prob);
@@ -131,31 +135,34 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
   /**
    * Maps given array of word/class ids to KenLM ids. For estimating cost and computing,
    * state retrieval differs slightly.
+   *
+   * When used for estimation tailNodes may be null.
    */
-  private long[] mapToKenLmIds(int[] ids, List<HGNode> tailNodes, boolean isOnlyEstimate) {
+  private void writeKenLmIds(int[] ids, List<HGNode> tailNodes, KenLMPool poolWrapper) {
+
+    poolWrapper.setBufferLength(ids.length);
+
     // The IDs we will to KenLM
-    long[] kenIds = new long[ids.length];
     for (int x = 0; x < ids.length; x++) {
       int id = ids[x];
 
       if (isNonterminal(id)) {
 
-        if (isOnlyEstimate) {
-          // For the estimate, we can just mark negative values
-          kenIds[x] = -1;
+        if (tailNodes == null) {
+          // For the estimation, we can just mark negative values
+          poolWrapper.writeIdToBuffer(x, -1);
         } else {
           // Nonterminal: retrieve the KenLM long that records the state
           int index = -(id + 1);
           final KenLMState state = (KenLMState) tailNodes.get(index).getDPState(stateIndex);
-          kenIds[x] = -state.getState();
+          poolWrapper.writeIdToBuffer(x, -state.getState());
         }
 
       } else {
         // Terminal: just add it
-        kenIds[x] = id;
+        poolWrapper.writeIdToBuffer(x, id);
       }
     }
-    return kenIds;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
index e5dcbf9..93dab1d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
@@ -205,7 +205,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 0902c5a..6b3e37b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -377,7 +377,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
 
 //      LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
       for (FeatureFunction ff : models) {
-        float val = ff.estimateCost(this, null);
+        float val = ff.estimateCost(this);
 //        LOG.debug("  FEATURE {} -> {}", ff.getName(), val);
         this.estimatedCost += val; 
       }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/util/Constants.java b/src/main/java/org/apache/joshua/util/Constants.java
index 9612a35..b70f6f2 100644
--- a/src/main/java/org/apache/joshua/util/Constants.java
+++ b/src/main/java/org/apache/joshua/util/Constants.java
@@ -33,4 +33,5 @@ public final class Constants {
 
   public static final String fieldDelimiter = "\\s\\|{3}\\s";
   public static final String spaceSeparator = "\\s+";
+  public static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 9f26f8f..38d6fcc 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -90,23 +90,23 @@ public class KenLmTest {
     registerLanguageModel(kenLm);
     String sentence = "Wayne Gretzky";
     String[] words = sentence.split("\\s+");
-    int[] ids = Vocabulary.addAll(sentence);
-    long[] longIds = new long[ids.length];
-
-    for (int i = 0; i < words.length; i++) {
-      longIds[i] = ids[i];
-    }
+    Vocabulary.addAll(sentence);
 
     // WHEN
     KenLM.StateProbPair result;
     try (KenLMPool poolPointer = kenLm.createLMPool()) {
-      result = kenLm.probRule(longIds, poolPointer);
+
+      poolPointer.setBufferLength(words.length);
+      for(int i =0; i< words.length; i++) {
+        poolPointer.writeIdToBuffer(i, Vocabulary.id(words[i]));
+      }
+      result = kenLm.probRule(poolPointer);
     }
 
     // THEN
     assertThat(result, is(notNullValue()));
     assertThat(result.state.getState(), is(1L));
-    assertThat(result.prob, is(-3.7906885f));
+    assertThat(result.prob, is(-3.7906885F));
   }
 
   @Test