You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:45:45 UTC

[1/6] incubator-joshua git commit: Convert to a DirectBuffer to transfer ngrams during probRule

Repository: incubator-joshua
Updated Branches:
  refs/heads/master 402b08a87 -> aecc0b088


Convert to a DirectBuffer to transfer ngrams during probRule


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9ea7eebf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9ea7eebf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9ea7eebf

Branch: refs/heads/master
Commit: 9ea7eebf0164d1676f633b441bd952eaa20b0760
Parents: 9c6ae40
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Thu Sep 15 19:06:04 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Sep 15 19:35:46 2016 +0200

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                               |  30 +++---
 .../org/apache/joshua/decoder/KenLMPool.java    |  10 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 108 +++++++++++--------
 3 files changed, 89 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9ea7eebf/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index bd82fe4..0f3148a 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -84,7 +84,9 @@ typedef std::unordered_set<StateIndex, HashIndex, EqualIndex> Lookup;
  */
 class Chart {
   public:
-    Chart() : lookup_(1000, HashIndex(vec_), EqualIndex(vec_)) {}
+    Chart(long* ngramBuffer) : 
+    ngramBuffer_(ngramBuffer),
+    lookup_(1000, HashIndex(vec_), EqualIndex(vec_)) {}
 
     StateIndex Intern(const lm::ngram::ChartState &state) {
       vec_.push_back(state);
@@ -99,6 +101,7 @@ class Chart {
     const lm::ngram::ChartState &InterpretState(StateIndex index) const {
       return vec_[index - 1];
     }
+    long* ngramBuffer_;
 
   private:
     StateVector vec_;
@@ -140,7 +143,7 @@ public:
 
   virtual bool IsKnownWordIndex(const lm::WordIndex& id) const = 0;
 
-  virtual float ProbRule(jlong *begin, jlong *end, lm::ngram::ChartState& state, const Chart &chart) const = 0;
+  virtual float ProbRule(lm::ngram::ChartState& state, const Chart &chart) const = 0;
 
   virtual float ProbString(jint * const begin, jint * const end,
       jint start) const = 0;
@@ -197,7 +200,12 @@ public:
       return id != m_.GetVocabulary().NotFound();
   }
 
-  float ProbRule(jlong * const begin, jlong * const end, lm::ngram::ChartState& state, const Chart &chart) const {
+  float ProbRule(lm::ngram::ChartState& state, const Chart &chart) const {
+
+    // By convention the first long in the ngramBuffer denots the size of the buffer
+    long* begin = chart.ngramBuffer_ + 1;
+    long* end = begin + *chart.ngramBuffer_;
+
     if (begin == end) return 0.0;
     lm::ngram::RuleScore<Model> ruleScore(m_, state);
 
@@ -351,8 +359,10 @@ JNIEXPORT void JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_destroy(
 }
 
 JNIEXPORT jlong JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_createPool(
-    JNIEnv *env, jclass) {
-  return reinterpret_cast<long>(new Chart());
+    JNIEnv *env, jclass, jobject arr) {
+  jlong* ngramBuffer = (jlong*)env->GetDirectBufferAddress(arr);
+  Chart *newChart = new Chart(ngramBuffer);
+  return reinterpret_cast<long>(newChart);
 }
 
 JNIEXPORT void JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_destroyPool(
@@ -449,20 +459,14 @@ union FloatConverter {
 };
 
 JNIEXPORT jlong JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_probRule(
-  JNIEnv *env, jclass, jlong pointer, jlong chartPtr, jlongArray arr) {
-
-  jint length = env->GetArrayLength(arr);
-  // GCC only.
-  jlong values[length];
-  env->GetLongArrayRegion(arr, 0, length, values);
+  JNIEnv *env, jclass, jlong pointer, jlong chartPtr) {
 
   // Compute the probability
   lm::ngram::ChartState outState;
   const VirtualBase *base = reinterpret_cast<const VirtualBase*>(pointer);
   Chart* chart = reinterpret_cast<Chart*>(chartPtr);
   FloatConverter prob;
-  prob.f = base->ProbRule(values, values + length, outState, *chart);
-
+  prob.f = base->ProbRule(outState, *chart);
   StateIndex index = chart->Intern(outState);
   return static_cast<uint64_t>(index) << 32 | static_cast<uint64_t>(prob.i);
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9ea7eebf/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
index 378ac51..a1e709b 100644
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
@@ -2,6 +2,8 @@ package org.apache.joshua.decoder;
 
 import org.apache.joshua.decoder.ff.lm.KenLM;
 
+import java.nio.ByteBuffer;
+
 /**
  * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
  * used in a scoped context, and close must be called to release native resources.  It
@@ -15,11 +17,13 @@ public class KenLMPool implements AutoCloseable {
 
   private final long pool;
   private final KenLM languageModel;
+  private final ByteBuffer ngramBuffer;
   private boolean released = false;
 
-  public KenLMPool(long pool, KenLM languageModel) {
+  public KenLMPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
     this.pool = pool;
     this.languageModel = languageModel;
+    this.ngramBuffer = ngramBuffer;
   }
 
   public long getPool() {
@@ -39,4 +43,8 @@ public class KenLMPool implements AutoCloseable {
       languageModel.destroyLMPool(pool);
     }
   }
+
+  public ByteBuffer getNgramBuffer() {
+    return ngramBuffer;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9ea7eebf/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index 0646f68..e8a9f0f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -25,6 +25,8 @@ import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.nio.ByteBuffer;
+
 /**
  * JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
  * feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
@@ -37,8 +39,13 @@ import org.slf4j.LoggerFactory;
 
 public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
+  private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
+
   private static final Logger LOG = LoggerFactory.getLogger(KenLM.class);
 
+  // Maximum number of terminal and non-terminal symbols on a rule's target side
+  private static final int MAX_TARGET_LENGTH = 256;
+
   private final long pointer;
 
   // this is read from the config file, used to set maximum order
@@ -46,6 +53,24 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   // inferred from model file (may be larger than ngramOrder)
   private final int N;
 
+  public KenLM(int order, String file_name) {
+    pointer = initializeSystemLibrary(file_name);
+    ngramOrder = order;
+    N = order(pointer);
+  }
+
+  /**
+   * Constructor if order is not known.
+   * Order will be inferred from the model.
+   *
+   * @param file_name string path to an input file
+   */
+  public KenLM(String file_name) {
+    pointer = initializeSystemLibrary(file_name);
+    N = order(pointer);
+    ngramOrder = N;
+  }
+
   private static native long construct(String file_name);
 
   private static native void destroy(long ptr);
@@ -62,33 +87,16 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   private static native boolean isLmOov(long ptr, int word);
 
-  private static native long probRule(long ptr, long pool, long words[]);
+  private static native long probRule(long ptr, long pool);
 
   private static native float estimateRule(long ptr, long words[]);
 
   private static native float probString(long ptr, int words[], int start);
 
-  private static native long createPool();
+  private static native long createPool(ByteBuffer wordsBuffer);
 
   private static native void destroyPool(long pointer);
 
-  public KenLM(int order, String file_name) {
-    pointer = initializeSystemLibrary(file_name);
-    ngramOrder = order;
-    N = order(pointer);
-  }
-
-  /**
-   * Constructor if order is not known.
-   * Order will be inferred from the model.
-   * @param file_name string path to an input file
-   */
-  public KenLM(String file_name) {
-    pointer = initializeSystemLibrary(file_name);
-    N = order(pointer);
-    ngramOrder = N;
-  }
-
   private long initializeSystemLibrary(String file_name) {
     try {
       System.loadLibrary("ken");
@@ -99,15 +107,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     }
   }
 
-  public static class KenLMLoadException extends RuntimeException {
-
-    public KenLMLoadException(UnsatisfiedLinkError e) {
-      super(e);
-    }
-  }
-
   public KenLMPool createLMPool() {
-    return new KenLMPool(createPool(), this);
+    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
+    ngramBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
+    long pool = createPool(ngramBuffer);
+    return new KenLMPool(pool, this, ngramBuffer);
   }
 
   public void destroyLMPool(long pointer) {
@@ -134,6 +138,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   /**
    * Query for n-gram probability using strings.
+   *
    * @param words a string array of words
    * @return float value denoting probability
    */
@@ -153,15 +158,21 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
    * state and the LM probability incurred along this rule.
    *
-   * @param words array of words
+   * @param words       array of words
    * @param poolWrapper an object that wraps a pool reference returned from KenLM createPool
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
   public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
-    long packedResult = probRule(pointer, poolWrapper.getPool(), words);
+
+    poolWrapper.getNgramBuffer().putLong(0, words.length);
+    for (int i = 0; i < words.length; i++) {
+      poolWrapper.getNgramBuffer().putLong((i + 1) * LONG_SIZE_IN_BYTES, words[i]);
+    }
+
+    long packedResult = probRule(pointer, poolWrapper.getPool());
     int state = (int) (packedResult >> 32);
-    float probVal = Float.intBitsToFloat((int)packedResult);
+    float probVal = Float.intBitsToFloat((int) packedResult);
 
     return new StateProbPair(state, probVal);
   }
@@ -186,6 +197,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   /**
    * The start symbol for a KenLM is the Vocabulary.START_SYM.
+   *
    * @return "&lt;s&gt;"
    */
   public String getStartSymbol() {
@@ -209,21 +221,6 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     return isKnownWord(pointer, word);
   }
 
-
-  /**
-   * Inner class used to hold the results returned from KenLM with left-state minimization. Note
-   * that inner classes have to be static to be accessible from the JNI!
-   */
-  public static class StateProbPair {
-    public KenLMState state = null;
-    public float prob = 0.0f;
-
-    public StateProbPair(long state, float prob) {
-      this.state = new KenLMState(state);
-      this.prob = prob;
-    }
-  }
-
   @Override
   public int compareTo(KenLM other) {
     if (this == other)
@@ -252,4 +249,25 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     return prob(ngram);
   }
 
+  public static class KenLMLoadException extends RuntimeException {
+
+    public KenLMLoadException(UnsatisfiedLinkError e) {
+      super(e);
+    }
+  }
+
+  /**
+   * Inner class used to hold the results returned from KenLM with left-state minimization. Note
+   * that inner classes have to be static to be accessible from the JNI!
+   */
+  public static class StateProbPair {
+    public KenLMState state = null;
+    public float prob = 0.0f;
+
+    public StateProbPair(long state, float prob) {
+      this.state = new KenLMState(state);
+      this.prob = prob;
+    }
+  }
+
 }

[3/6] incubator-joshua git commit: Remove uneeded modifications for estimate in KenLM

Posted by mj...@apache.org.

Remove uneeded modifications for estimate in KenLM


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d9c3d7ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d9c3d7ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d9c3d7ec

Branch: refs/heads/master
Commit: d9c3d7ecf069a6a0339b911b9defb8ce31ebb1f1
Parents: c8d8a65
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 27 17:31:37 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 27 18:16:44 2016 +0200

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                               | 30 ++++++------
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 32 ++++++++-----
 .../ff/lm/StateMinimizingLanguageModel.java     | 49 +++++++++-----------
 .../org/apache/joshua/system/KenLmTest.java     | 16 +++----
 4 files changed, 64 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9c3d7ec/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index 8947a61..445b57b 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -76,11 +76,10 @@ class EqualIndex : public std::binary_function<StateIndex, StateIndex, bool> {
 typedef std::unordered_set<StateIndex, HashIndex, EqualIndex> Lookup;
 
 /**
- * A Chart bundles together a unordered_multimap that maps ChartState signatures to a single
- * object instantiated using a pool. This allows duplicate states to avoid allocating separate
- * state objects at multiple places throughout a sentence, and also allows state to be shared
- * across KenLMs for the same sentence.  Multimap is used to avoid hash collisions which can
- * return incorrect results, and cause out-of-bounds lookups when multiple KenLMs are in use.
+ * A Chart bundles together a vector holding CharStates and an unordered_set of StateIndexes
+ * which provides a mapping between StateIndexes and the positions of ChartStates in the vector.
+ * This allows for duplicate states to avoid allocating separate state objects at multiple places
+ * throughout a sentence.
  */
 class Chart {
   public:
@@ -148,7 +147,7 @@ public:
   virtual float ProbString(jint * const begin, jint * const end,
       jint start) const = 0;
 
-  virtual float EstimateRule(const Chart &chart) const = 0;
+  virtual float EstimateRule(jlong *begin, jlong *end) const = 0;
 
   virtual uint8_t Order() const = 0;
 
@@ -202,7 +201,7 @@ public:
 
   float ProbRule(lm::ngram::ChartState& state, const Chart &chart) const {
 
-    // By convention the first long in the ngramBuffer denots the size of the buffer
+    // By convention the first long in the ngramBuffer denotes the size of the buffer
     long* begin = chart.ngramBuffer_ + 1;
     long* end = begin + *chart.ngramBuffer_;
 
@@ -229,12 +228,7 @@ public:
     return ruleScore.Finish();
   }
 
-  float EstimateRule(const Chart &chart) const {
-
-    // By convention the first long in the ngramBuffer denotes the size of the buffer
-    long* begin = chart.ngramBuffer_ + 1;
-    long* end = begin + *chart.ngramBuffer_;
-
+  float EstimateRule(jlong * const begin, jlong * const end) const {
     if (begin == end) return 0.0;
     lm::ngram::ChartState nullState;
     lm::ngram::RuleScore<Model> ruleScore(m_, nullState);
@@ -477,11 +471,15 @@ JNIEXPORT jlong JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_probRule(
 }
 
 JNIEXPORT jfloat JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_estimateRule(
-  JNIEnv *env, jclass, jlong pointer, jlong chartPtr) {
+  JNIEnv *env, jclass, jlong pointer, jlongArray arr) {
+  jint length = env->GetArrayLength(arr);
+  // GCC only.
+  jlong values[length];
+  env->GetLongArrayRegion(arr, 0, length, values);
 
   // Compute the probability
-  Chart* chart = reinterpret_cast<Chart*>(chartPtr);
-  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(*chart);
+  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(values,
+      values + length);
 }
 
 } // extern

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9c3d7ec/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index df16019..d138495 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -21,13 +21,15 @@ package org.apache.joshua.decoder.ff.lm;
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
-import org.apache.joshua.util.Constants;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.nio.ByteBuffer;
 
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
 /**
  * JNI wrapper for KenLM. This version of KenLM supports two use cases, implemented by the separate
  * feature functions KenLMFF and LanguageModelFF. KenLMFF uses the RuleScore() interface in
@@ -88,7 +90,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   private static native long probRule(long ptr, long pool);
 
-  private static native float estimateRule(long ptr, long poolWrapper);
+  private static native float estimateRule(long ptr, long words[]);
 
   private static native float probString(long ptr, int words[], int start);
 
@@ -107,9 +109,8 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   public KenLMPool createLMPool() {
-    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH *
-            Constants.LONG_SIZE_IN_BYTES);
-    ngramBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
+    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
+    ngramBuffer.order(LITTLE_ENDIAN);
     long pool = createPool(ngramBuffer);
     return new KenLMPool(pool, this, ngramBuffer);
   }
@@ -158,11 +159,18 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
    * state and the LM probability incurred along this rule.
    *
+   * @param words       array of words
    * @param poolWrapper an object that wraps a pool reference returned from KenLM createPool
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
-  public StateProbPair probRule(KenLMPool poolWrapper) {
+  public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
+
+    poolWrapper.setBufferLength(words.length);
+    for (int i = 0; i < words.length; i++) {
+      poolWrapper.writeIdToBuffer(i, words[i]);
+    }
+
     long packedResult = probRule(pointer, poolWrapper.getPool());
     int state = (int) (packedResult >> 32);
     float probVal = Float.intBitsToFloat((int) packedResult);
@@ -174,12 +182,13 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * Public facing function that estimates the cost of a rule, which value is used for sorting
    * rules during cube pruning.
    *
+   * @param words array of words
    * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
    */
-  public float estimateRule(KenLMPool poolWrapper) {
+  public float estimateRule(long[] words) {
     float estimate = 0.0f;
     try {
-      estimate = estimateRule(pointer, poolWrapper.getPool());
+      estimate = estimateRule(pointer, words);
     } catch (NoSuchMethodError e) {
       throw new RuntimeException(e);
     }
@@ -249,11 +258,12 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   /**
-   * Inner class used to hold the results returned from KenLM with left-state minimization.
+   * Inner class used to hold the results returned from KenLM with left-state minimization. Note
+   * that inner classes have to be static to be accessible from the JNI!
    */
   public static class StateProbPair {
-    public KenLMState state = null;
-    public float prob = 0.0f;
+    public final KenLMState state;
+    public final float prob;
 
     public StateProbPair(long state, float prob) {
       this.state = new KenLMState(state);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9c3d7ec/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 155522b..c3281d6 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -76,15 +76,13 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
 
     int[] ruleWords = getRuleIds(rule);
 
-    try(KenLMPool poolWrapper = ((KenLM)languageModel).createLMPool();) {
-      // Write KenLM word ids to a shared ByteBuffer.
-      writeKenLmIds(ruleWords, null, poolWrapper);
-
-      // Get the probability of applying the rule and the new state
-      float lmCost = weight * ((KenLM)languageModel).estimateRule(poolWrapper);
-      float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
-      return lmCost + oovCost;
-    }
+    // map to ken lm ids
+    final long[] words = mapToKenLmIds(ruleWords, null, true);
+    
+    // Get the probability of applying the rule and the new state
+    float lmCost = weight * ((KenLM) languageModel).estimateRule(words);
+    float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
+    return lmCost + oovCost;
   }
 
   private UUID languageModelPoolId = UUID.randomUUID();
@@ -103,7 +101,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
 
     int[] ruleWords;
     if (config.source_annotations) {
-      // Get source side annotations and project them to the target side
+      // get source side annotations and project them to the target side
       ruleWords = getTags(rule, i, j, sentence);
     } else {
       ruleWords = getRuleIds(rule);
@@ -114,16 +112,14 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
       acc.add(oovDenseFeatureIndex, getOovs(ruleWords));
     }
 
-    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId,
-            (KenLM)languageModel);
-
-     // Write KenLM ngram ids to the shared direct buffer
-    writeKenLmIds(ruleWords, tailNodes, statePool);
-
+     // map to ken lm ids
+    final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
 
+    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
+            languageModel);
 
     // Get the probability of applying the rule and the new state
-    final StateProbPair pair = ((KenLM)languageModel).probRule(statePool);
+    final StateProbPair pair = ((KenLM) languageModel).probRule(words, statePool);
 
     // Record the prob
     acc.add(denseFeatureIndex, pair.prob);
@@ -135,34 +131,31 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
   /**
    * Maps given array of word/class ids to KenLM ids. For estimating cost and computing,
    * state retrieval differs slightly.
-   *
-   * When used for estimation tailNodes may be null.
    */
-  private void writeKenLmIds(int[] ids, List<HGNode> tailNodes, KenLMPool poolWrapper) {
-
-    poolWrapper.setBufferLength(ids.length);
-
+  private long[] mapToKenLmIds(int[] ids, List<HGNode> tailNodes, boolean isOnlyEstimate) {
     // The IDs we will to KenLM
+    long[] kenIds = new long[ids.length];
     for (int x = 0; x < ids.length; x++) {
       int id = ids[x];
 
       if (isNonterminal(id)) {
 
-        if (tailNodes == null) {
-          // For the estimation, we can just mark negative values
-          poolWrapper.writeIdToBuffer(x, -1);
+        if (isOnlyEstimate) {
+          // For the estimate, we can just mark negative values
+          kenIds[x] = -1;
         } else {
           // Nonterminal: retrieve the KenLM long that records the state
           int index = -(id + 1);
           final KenLMState state = (KenLMState) tailNodes.get(index).getDPState(stateIndex);
-          poolWrapper.writeIdToBuffer(x, -state.getState());
+          kenIds[x] = -state.getState();
         }
 
       } else {
         // Terminal: just add it
-        poolWrapper.writeIdToBuffer(x, id);
+        kenIds[x] = id;
       }
     }
+    return kenIds;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d9c3d7ec/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 38d6fcc..2d129f1 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -90,23 +90,23 @@ public class KenLmTest {
     registerLanguageModel(kenLm);
     String sentence = "Wayne Gretzky";
     String[] words = sentence.split("\\s+");
-    Vocabulary.addAll(sentence);
+    int[] ids = Vocabulary.addAll(sentence);
+    long[] longIds = new long[ids.length];
+
+    for (int i = 0; i < words.length; i++) {
+      longIds[i] = Vocabulary.id(words[i]);
+    }
 
     // WHEN
     KenLM.StateProbPair result;
     try (KenLMPool poolPointer = kenLm.createLMPool()) {
-
-      poolPointer.setBufferLength(words.length);
-      for(int i =0; i< words.length; i++) {
-        poolPointer.writeIdToBuffer(i, Vocabulary.id(words[i]));
-      }
-      result = kenLm.probRule(poolPointer);
+      result = kenLm.probRule(longIds, poolPointer);
     }
 
     // THEN
     assertThat(result, is(notNullValue()));
     assertThat(result.state.getState(), is(1L));
-    assertThat(result.prob, is(-3.7906885F));
+    assertThat(result.prob, is(-3.7906885f));
   }
 
   @Test

[6/6] incubator-joshua git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-joshua

Posted by mj...@apache.org.

Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-joshua


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/aecc0b08
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/aecc0b08
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/aecc0b08

Branch: refs/heads/master
Commit: aecc0b08862154556152ce48498f586bf7b5cde7
Parents: 553e219 402b08a
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Sep 28 08:45:35 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Sep 28 08:45:35 2016 -0400

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/adagrad/AdaGradCore.java | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------

[4/6] incubator-joshua git commit: Explicitly bind KenLMs to LmPool objects

Posted by mj...@apache.org.

Explicitly bind KenLMs to LmPool objects


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e9f4f5b1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e9f4f5b1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e9f4f5b1

Branch: refs/heads/master
Commit: e9f4f5b1468364a658f90c168e2b8ec69c3fa48e
Parents: d9c3d7e
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 27 18:29:50 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 27 18:29:50 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/KenLMPool.java    | 70 --------------------
 .../decoder/LanguageModelStateManager.java      |  8 +--
 .../java/org/apache/joshua/decoder/LmPool.java  | 70 ++++++++++++++++++++
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 14 ++--
 .../ff/lm/StateMinimizingLanguageModel.java     |  4 +-
 .../joshua/decoder/segment_file/Sentence.java   |  4 --
 .../org/apache/joshua/system/KenLmTest.java     |  4 +-
 7 files changed, 88 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
deleted file mode 100644
index 48aa912..0000000
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package org.apache.joshua.decoder;
-
-import org.apache.joshua.decoder.ff.lm.KenLM;
-
-import java.nio.ByteBuffer;
-
-import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
-
-/**
- * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
- * used in a scoped context, and close must be called to release native resources.  It
- * does implement a custom finalizer that will release these resources if needed, but
- * this should not be relied on.
- *
- * @author Kellen Sunderland
- */
-
-public class KenLMPool implements AutoCloseable {
-
-  private final long pool;
-  private final KenLM languageModel;
-  private final ByteBuffer ngramBuffer;
-  private boolean released = false;
-
-  public KenLMPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
-    this.pool = pool;
-    this.languageModel = languageModel;
-    this.ngramBuffer = ngramBuffer;
-  }
-
-  public long getPool() {
-    return pool;
-  }
-
-  @Override
-  protected void finalize() throws Throwable {
-    close();
-    super.finalize();
-  }
-
-  @Override
-  public synchronized void close() {
-    if (!released) {
-      released = true;
-      languageModel.destroyLMPool(pool);
-    }
-  }
-
-  /**
-   * Write a single id to the KenLM shared buffer.
-   * Note: This method must be used in conjunction with setBufferLength.
-   *
-   * @param index index at which to write id.
-   * @param id    word id to write.
-   */
-  public void writeIdToBuffer(int index, long id) {
-    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
-  }
-
-  /**
-   * Manually set the length of the ngram array to be used when calling probRule or estimate on
-   * KenLM.
-   * Note: Must be used if you are calling writeIdToBuffer.
-   *
-   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
-   */
-  public void setBufferLength(long length) {
-    ngramBuffer.putLong(0, length);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
index 6a3c4b3..82c3212 100644
--- a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
+++ b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
@@ -11,10 +11,10 @@ import java.util.UUID;
  */
 public class LanguageModelStateManager {
 
-  private Map<UUID, KenLMPool> languageModelPoolMapping = new HashMap<>();
+  private Map<UUID, LmPool> languageModelPoolMapping = new HashMap<>();
 
-  public KenLMPool getStatePool(UUID languageModelId, KenLM languageModel) {
-    KenLMPool statePool = languageModelPoolMapping.get(languageModelId);
+  public LmPool getStatePool(UUID languageModelId, KenLM languageModel) {
+    LmPool statePool = languageModelPoolMapping.get(languageModelId);
     if (statePool == null) {
       statePool = languageModel.createLMPool();
       languageModelPoolMapping.put(languageModelId, statePool);
@@ -23,7 +23,7 @@ public class LanguageModelStateManager {
   }
 
   public void clearStatePool() {
-    languageModelPoolMapping.values().forEach(KenLMPool::close);
+    languageModelPoolMapping.values().forEach(LmPool::close);
     languageModelPoolMapping.clear();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LmPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LmPool.java b/src/main/java/org/apache/joshua/decoder/LmPool.java
new file mode 100644
index 0000000..73af0a6
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/LmPool.java
@@ -0,0 +1,70 @@
+package org.apache.joshua.decoder;
+
+import org.apache.joshua.decoder.ff.lm.KenLM;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
+/**
+ * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
+ * used in a scoped context, and close must be called to release native resources.  It
+ * does implement a custom finalizer that will release these resources if needed, but
+ * this should not be relied on.
+ *
+ * @author Kellen Sunderland
+ */
+
+public abstract class LmPool implements AutoCloseable {
+
+  private final long pool;
+  private final KenLM languageModel;
+  private final ByteBuffer ngramBuffer;
+  private boolean released = false;
+
+  public LmPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
+    this.pool = pool;
+    this.languageModel = languageModel;
+    this.ngramBuffer = ngramBuffer;
+  }
+
+  public long getPool() {
+    return pool;
+  }
+
+  @Override
+  protected void finalize() throws Throwable {
+    close();
+    super.finalize();
+  }
+
+  @Override
+  public synchronized void close() {
+    if (!released) {
+      released = true;
+      languageModel.destroyLMPool(pool);
+    }
+  }
+
+  /**
+   * Write a single id to the KenLM shared buffer.
+   * Note: This method must be used in conjunction with setBufferLength.
+   *
+   * @param index index at which to write id.
+   * @param id    word id to write.
+   */
+  public void writeIdToBuffer(int index, long id) {
+    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
+  }
+
+  /**
+   * Manually set the length of the ngram array to be used when calling probRule or estimate on
+   * KenLM.
+   * Note: Must be used if you are calling writeIdToBuffer.
+   *
+   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
+   */
+  public void setBufferLength(long length) {
+    ngramBuffer.putLong(0, length);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index d138495..98bba83 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -18,8 +18,9 @@
  */
 package org.apache.joshua.decoder.ff.lm;
 
+import javafx.scene.Parent;
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
@@ -108,11 +109,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     }
   }
 
-  public KenLMPool createLMPool() {
+  public LmPool createLMPool() {
     ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
     ngramBuffer.order(LITTLE_ENDIAN);
     long pool = createPool(ngramBuffer);
-    return new KenLMPool(pool, this, ngramBuffer);
+    return new KenLMPool(pool, ngramBuffer);
   }
 
   public void destroyLMPool(long pointer) {
@@ -164,7 +165,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
-  public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
+  public StateProbPair probRule(long[] words, LmPool poolWrapper) {
 
     poolWrapper.setBufferLength(words.length);
     for (int i = 0; i < words.length; i++) {
@@ -271,4 +272,9 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     }
   }
 
+  private class KenLMPool extends LmPool {
+    protected KenLMPool(long pool, ByteBuffer ngramBuffer) {
+      super(pool, KenLM.this, ngramBuffer);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index c3281d6..f15db05 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -25,7 +25,7 @@ import java.util.UUID;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair;
@@ -115,7 +115,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
      // map to ken lm ids
     final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
 
-    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
+    LmPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
             languageModel);
 
     // Get the probability of applying the rule and the new state

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index f84c41a..1d8712d 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -21,20 +21,16 @@ package org.apache.joshua.decoder.segment_file;
 import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.StringTokenizer;
-import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.LanguageModelStateManager;
 import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.lattice.Arc;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 2d129f1..7c42485 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -19,7 +19,7 @@
 package org.apache.joshua.system;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.ff.lm.KenLM;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
@@ -99,7 +99,7 @@ public class KenLmTest {
 
     // WHEN
     KenLM.StateProbPair result;
-    try (KenLMPool poolPointer = kenLm.createLMPool()) {
+    try (LmPool poolPointer = kenLm.createLMPool()) {
       result = kenLm.probRule(longIds, poolPointer);
     }

[2/6] incubator-joshua git commit: Converted estimateRule to also make use of DirectBuffer. Reduced number of array copies in probRule. Removed sentence from estimate method signature (as it was unused). Created an abstraction in the KenLMPool class to h

Posted by mj...@apache.org.

Converted estimateRule to also make use of DirectBuffer.
Reduced number of array copies in probRule.
Removed sentence from estimate method signature (as it was unused).
Created an abstraction in the KenLMPool class to hide details of underlying ByteBuffer Indexing.
Fixed Test givenKenLm_whenQueryingWithState_thenStateAndProbReturned


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/c8d8a65b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/c8d8a65b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/c8d8a65b

Branch: refs/heads/master
Commit: c8d8a65b9352e51e777965994dae7f9337b08def
Parents: 9ea7eeb
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Thu Sep 15 19:31:21 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 20 13:26:27 2016 +0200

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                               | 19 ++++----
 .../org/apache/joshua/decoder/KenLMPool.java    | 26 ++++++++--
 .../joshua/decoder/ff/FeatureFunction.java      |  2 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |  2 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |  2 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |  2 +-
 .../apache/joshua/decoder/ff/StatelessFF.java   |  2 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |  2 +-
 .../apache/joshua/decoder/ff/WordPenalty.java   |  2 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |  2 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 25 +++-------
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |  2 +-
 .../ff/lm/StateMinimizingLanguageModel.java     | 51 +++++++++++---------
 .../ff/similarity/EdgePhraseSimilarityFF.java   |  2 +-
 .../org/apache/joshua/decoder/ff/tm/Rule.java   |  2 +-
 .../java/org/apache/joshua/util/Constants.java  |  1 +
 .../org/apache/joshua/system/KenLmTest.java     | 16 +++---
 17 files changed, 90 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index 0f3148a..8947a61 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -148,7 +148,7 @@ public:
   virtual float ProbString(jint * const begin, jint * const end,
       jint start) const = 0;
 
-  virtual float EstimateRule(jlong *begin, jlong *end) const = 0;
+  virtual float EstimateRule(const Chart &chart) const = 0;
 
   virtual uint8_t Order() const = 0;
 
@@ -229,7 +229,12 @@ public:
     return ruleScore.Finish();
   }
 
-  float EstimateRule(jlong * const begin, jlong * const end) const {
+  float EstimateRule(const Chart &chart) const {
+
+    // By convention the first long in the ngramBuffer denotes the size of the buffer
+    long* begin = chart.ngramBuffer_ + 1;
+    long* end = begin + *chart.ngramBuffer_;
+
     if (begin == end) return 0.0;
     lm::ngram::ChartState nullState;
     lm::ngram::RuleScore<Model> ruleScore(m_, nullState);
@@ -472,15 +477,11 @@ JNIEXPORT jlong JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_probRule(
 }
 
 JNIEXPORT jfloat JNICALL Java_org_apache_joshua_decoder_ff_lm_KenLM_estimateRule(
-  JNIEnv *env, jclass, jlong pointer, jlongArray arr) {
-  jint length = env->GetArrayLength(arr);
-  // GCC only.
-  jlong values[length];
-  env->GetLongArrayRegion(arr, 0, length, values);
+  JNIEnv *env, jclass, jlong pointer, jlong chartPtr) {
 
   // Compute the probability
-  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(values,
-      values + length);
+  Chart* chart = reinterpret_cast<Chart*>(chartPtr);
+  return reinterpret_cast<const VirtualBase*>(pointer)->EstimateRule(*chart);
 }
 
 } // extern

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
index a1e709b..48aa912 100644
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
@@ -4,6 +4,8 @@ import org.apache.joshua.decoder.ff.lm.KenLM;
 
 import java.nio.ByteBuffer;
 
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
 /**
  * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
  * used in a scoped context, and close must be called to release native resources.  It
@@ -37,14 +39,32 @@ public class KenLMPool implements AutoCloseable {
   }
 
   @Override
-  public void close() {
+  public synchronized void close() {
     if (!released) {
       released = true;
       languageModel.destroyLMPool(pool);
     }
   }
 
-  public ByteBuffer getNgramBuffer() {
-    return ngramBuffer;
+  /**
+   * Write a single id to the KenLM shared buffer.
+   * Note: This method must be used in conjunction with setBufferLength.
+   *
+   * @param index index at which to write id.
+   * @param id    word id to write.
+   */
+  public void writeIdToBuffer(int index, long id) {
+    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
+  }
+
+  /**
+   * Manually set the length of the ngram array to be used when calling probRule or estimate on
+   * KenLM.
+   * Note: Must be used if you are calling writeIdToBuffer.
+   *
+   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
+   */
+  public void setBufferLength(long length) {
+    ngramBuffer.putLong(0, length);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index 8b17326..340af0e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -253,7 +253,7 @@ public abstract class FeatureFunction {
    * @param sentence {@link org.apache.joshua.lattice.Lattice} input
    * @return the *weighted* cost of applying the feature.
    */
-  public abstract float estimateCost(Rule rule, Sentence sentence);
+  public abstract float estimateCost(Rule rule);
 
   /**
    * This feature is called to produce a *weighted estimate* of the future cost

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 6f37fa4..3d851c5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -94,7 +94,7 @@ public class OOVPenalty extends StatelessFF {
    * Important! estimateCost returns the *weighted* feature value.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null && this.ownerID.equals(rule.getOwner()))
       return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
     return 0.0f;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index bd490dc..e11051a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -85,7 +85,7 @@ public class PhraseModel extends StatelessFF {
    * functions.
    */
   @Override
-  public float estimateCost(final Rule rule, Sentence sentence) {
+  public float estimateCost(final Rule rule) {
 
     if (rule != null && rule.getOwner().equals(ownerID)) {
       if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 2643729..0cfd28f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -78,7 +78,7 @@ public class PhrasePenalty extends StatelessFF {
    * 
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE 
         && (rule.getOwner().equals(owner)))
       return weights.getDense(denseFeatureIndex) * value;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
index e473c37..86a36c2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -49,7 +49,7 @@ public abstract class StatelessFF extends FeatureFunction {
    * The estimated cost of applying this feature, given only the rule. This is used in sorting the
    * rules for cube pruning. For most features, this will be 0.0.
    */
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index a9264e0..cb5c94d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -191,7 +191,7 @@ public class TargetBigram extends StatefulFF {
    * TargetBigram features are only computed across hyperedges, so there is nothing to be done here.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index da315ec..bd51be3 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -72,7 +72,7 @@ public final class WordPenalty extends StatelessFF {
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     if (rule != null)
       return weights.getDense(denseFeatureIndex) * OMEGA * (rule.getEnglish().length - rule.getArity());
     return 0.0f;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 7388262..944f5f2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -278,7 +278,7 @@ public class FragmentLMFF extends StatefulFF {
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     // TODO Auto-generated method stub
     return 0;
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index e8a9f0f..df16019 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -21,6 +21,7 @@ package org.apache.joshua.decoder.ff.lm;
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
+import org.apache.joshua.util.Constants;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -39,8 +40,6 @@ import java.nio.ByteBuffer;
 
 public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
-  private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
-
   private static final Logger LOG = LoggerFactory.getLogger(KenLM.class);
 
   // Maximum number of terminal and non-terminal symbols on a rule's target side
@@ -89,7 +88,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
 
   private static native long probRule(long ptr, long pool);
 
-  private static native float estimateRule(long ptr, long words[]);
+  private static native float estimateRule(long ptr, long poolWrapper);
 
   private static native float probString(long ptr, int words[], int start);
 
@@ -108,7 +107,8 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   public KenLMPool createLMPool() {
-    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
+    ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH *
+            Constants.LONG_SIZE_IN_BYTES);
     ngramBuffer.order(java.nio.ByteOrder.LITTLE_ENDIAN);
     long pool = createPool(ngramBuffer);
     return new KenLMPool(pool, this, ngramBuffer);
@@ -158,18 +158,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * needed so KenLM knows which memory pool to use. When finished, it returns the updated KenLM
    * state and the LM probability incurred along this rule.
    *
-   * @param words       array of words
    * @param poolWrapper an object that wraps a pool reference returned from KenLM createPool
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
-  public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
-
-    poolWrapper.getNgramBuffer().putLong(0, words.length);
-    for (int i = 0; i < words.length; i++) {
-      poolWrapper.getNgramBuffer().putLong((i + 1) * LONG_SIZE_IN_BYTES, words[i]);
-    }
-
+  public StateProbPair probRule(KenLMPool poolWrapper) {
     long packedResult = probRule(pointer, poolWrapper.getPool());
     int state = (int) (packedResult >> 32);
     float probVal = Float.intBitsToFloat((int) packedResult);
@@ -181,13 +174,12 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * Public facing function that estimates the cost of a rule, which value is used for sorting
    * rules during cube pruning.
    *
-   * @param words array of words
    * @return the estimated cost of the rule (the (partial) n-gram probabilities of all words in the rule)
    */
-  public float estimateRule(long[] words) {
+  public float estimateRule(KenLMPool poolWrapper) {
     float estimate = 0.0f;
     try {
-      estimate = estimateRule(pointer, words);
+      estimate = estimateRule(pointer, poolWrapper.getPool());
     } catch (NoSuchMethodError e) {
       throw new RuntimeException(e);
     }
@@ -257,8 +249,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
   }
 
   /**
-   * Inner class used to hold the results returned from KenLM with left-state minimization. Note
-   * that inner classes have to be static to be accessible from the JNI!
+   * Inner class used to hold the results returned from KenLM with left-state minimization.
    */
   public static class StateProbPair {
     public KenLMState state = null;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index f5c1cb5..a29c754 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -325,7 +325,7 @@ public class LanguageModelFF extends StatefulFF {
    * n-grams on the left-hand side.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
 
     float lmEstimate = 0.0f;
     boolean considerIncompleteNgrams = true;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index 2219ce8..155522b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -72,17 +72,19 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
    * efficiently than the default {@link LanguageModelFF} class.
    */
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
 
     int[] ruleWords = getRuleIds(rule);
 
-    // map to ken lm ids
-    final long[] words = mapToKenLmIds(ruleWords, null, true);
-    
-    // Get the probability of applying the rule and the new state
-    float lmCost = weight * ((KenLM) languageModel).estimateRule(words);
-    float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
-    return lmCost + oovCost;
+    try(KenLMPool poolWrapper = ((KenLM)languageModel).createLMPool();) {
+      // Write KenLM word ids to a shared ByteBuffer.
+      writeKenLmIds(ruleWords, null, poolWrapper);
+
+      // Get the probability of applying the rule and the new state
+      float lmCost = weight * ((KenLM)languageModel).estimateRule(poolWrapper);
+      float oovCost = oovWeight * ((withOovFeature) ? getOovs(ruleWords) : 0f);
+      return lmCost + oovCost;
+    }
   }
 
   private UUID languageModelPoolId = UUID.randomUUID();
@@ -101,7 +103,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
 
     int[] ruleWords;
     if (config.source_annotations) {
-      // get source side annotations and project them to the target side
+      // Get source side annotations and project them to the target side
       ruleWords = getTags(rule, i, j, sentence);
     } else {
       ruleWords = getRuleIds(rule);
@@ -112,14 +114,16 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
       acc.add(oovDenseFeatureIndex, getOovs(ruleWords));
     }
 
-     // map to ken lm ids
-    final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
+    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId,
+            (KenLM)languageModel);
+
+     // Write KenLM ngram ids to the shared direct buffer
+    writeKenLmIds(ruleWords, tailNodes, statePool);
+
 
-    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
-            languageModel);
 
     // Get the probability of applying the rule and the new state
-    final StateProbPair pair = ((KenLM) languageModel).probRule(words, statePool);
+    final StateProbPair pair = ((KenLM)languageModel).probRule(statePool);
 
     // Record the prob
     acc.add(denseFeatureIndex, pair.prob);
@@ -131,31 +135,34 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
   /**
    * Maps given array of word/class ids to KenLM ids. For estimating cost and computing,
    * state retrieval differs slightly.
+   *
+   * When used for estimation tailNodes may be null.
    */
-  private long[] mapToKenLmIds(int[] ids, List<HGNode> tailNodes, boolean isOnlyEstimate) {
+  private void writeKenLmIds(int[] ids, List<HGNode> tailNodes, KenLMPool poolWrapper) {
+
+    poolWrapper.setBufferLength(ids.length);
+
     // The IDs we will to KenLM
-    long[] kenIds = new long[ids.length];
     for (int x = 0; x < ids.length; x++) {
       int id = ids[x];
 
       if (isNonterminal(id)) {
 
-        if (isOnlyEstimate) {
-          // For the estimate, we can just mark negative values
-          kenIds[x] = -1;
+        if (tailNodes == null) {
+          // For the estimation, we can just mark negative values
+          poolWrapper.writeIdToBuffer(x, -1);
         } else {
           // Nonterminal: retrieve the KenLM long that records the state
           int index = -(id + 1);
           final KenLMState state = (KenLMState) tailNodes.get(index).getDPState(stateIndex);
-          kenIds[x] = -state.getState();
+          poolWrapper.writeIdToBuffer(x, -state.getState());
         }
 
       } else {
         // Terminal: just add it
-        kenIds[x] = id;
+        poolWrapper.writeIdToBuffer(x, id);
       }
     }
-    return kenIds;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
index e5dcbf9..93dab1d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
@@ -205,7 +205,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
   }
 
   @Override
-  public float estimateCost(Rule rule, Sentence sentence) {
+  public float estimateCost(Rule rule) {
     return 0.0f;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 0902c5a..6b3e37b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -377,7 +377,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
 
 //      LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
       for (FeatureFunction ff : models) {
-        float val = ff.estimateCost(this, null);
+        float val = ff.estimateCost(this);
 //        LOG.debug("  FEATURE {} -> {}", ff.getName(), val);
         this.estimatedCost += val; 
       }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/main/java/org/apache/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/util/Constants.java b/src/main/java/org/apache/joshua/util/Constants.java
index 9612a35..b70f6f2 100644
--- a/src/main/java/org/apache/joshua/util/Constants.java
+++ b/src/main/java/org/apache/joshua/util/Constants.java
@@ -33,4 +33,5 @@ public final class Constants {
 
   public static final String fieldDelimiter = "\\s\\|{3}\\s";
   public static final String spaceSeparator = "\\s+";
+  public static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c8d8a65b/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 9f26f8f..38d6fcc 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -90,23 +90,23 @@ public class KenLmTest {
     registerLanguageModel(kenLm);
     String sentence = "Wayne Gretzky";
     String[] words = sentence.split("\\s+");
-    int[] ids = Vocabulary.addAll(sentence);
-    long[] longIds = new long[ids.length];
-
-    for (int i = 0; i < words.length; i++) {
-      longIds[i] = ids[i];
-    }
+    Vocabulary.addAll(sentence);
 
     // WHEN
     KenLM.StateProbPair result;
     try (KenLMPool poolPointer = kenLm.createLMPool()) {
-      result = kenLm.probRule(longIds, poolPointer);
+
+      poolPointer.setBufferLength(words.length);
+      for(int i =0; i< words.length; i++) {
+        poolPointer.writeIdToBuffer(i, Vocabulary.id(words[i]));
+      }
+      result = kenLm.probRule(poolPointer);
     }
 
     // THEN
     assertThat(result, is(notNullValue()));
     assertThat(result.state.getState(), is(1L));
-    assertThat(result.prob, is(-3.7906885f));
+    assertThat(result.prob, is(-3.7906885F));
   }
 
   @Test

[5/6] incubator-joshua git commit: Merge branch 'DirectBuffersRemoveEst' of https://github.com/KellenSunderland/incubator-joshua

Posted by mj...@apache.org.

Merge branch 'DirectBuffersRemoveEst' of https://github.com/KellenSunderland/incubator-joshua


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/553e219d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/553e219d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/553e219d

Branch: refs/heads/master
Commit: 553e219db9cdde9f9044cdde72acf30ed6dc7176
Parents: f5492d3 e9f4f5b
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Sep 28 08:44:27 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Sep 28 08:44:27 2016 -0400

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                               |  39 +++---
 .../org/apache/joshua/decoder/KenLMPool.java    |  42 -------
 .../decoder/LanguageModelStateManager.java      |   8 +-
 .../java/org/apache/joshua/decoder/LmPool.java  |  70 +++++++++++
 .../joshua/decoder/ff/FeatureFunction.java      |   2 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |   2 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |   2 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |   2 +-
 .../apache/joshua/decoder/ff/StatelessFF.java   |   2 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |   2 +-
 .../apache/joshua/decoder/ff/WordPenalty.java   |   2 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |   2 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 121 +++++++++++--------
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |   2 +-
 .../ff/lm/StateMinimizingLanguageModel.java     |   6 +-
 .../ff/similarity/EdgePhraseSimilarityFF.java   |   2 +-
 .../org/apache/joshua/decoder/ff/tm/Rule.java   |   2 +-
 .../joshua/decoder/segment_file/Sentence.java   |   4 -
 .../java/org/apache/joshua/util/Constants.java  |   1 +
 .../org/apache/joshua/system/KenLmTest.java     |   6 +-
 20 files changed, 186 insertions(+), 133 deletions(-)
----------------------------------------------------------------------