You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:45:48 UTC

[4/6] incubator-joshua git commit: Explicitly bind KenLMs to LmPool objects

Explicitly bind KenLMs to LmPool objects


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e9f4f5b1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e9f4f5b1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e9f4f5b1

Branch: refs/heads/master
Commit: e9f4f5b1468364a658f90c168e2b8ec69c3fa48e
Parents: d9c3d7e
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 27 18:29:50 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 27 18:29:50 2016 +0200

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/KenLMPool.java    | 70 --------------------
 .../decoder/LanguageModelStateManager.java      |  8 +--
 .../java/org/apache/joshua/decoder/LmPool.java  | 70 ++++++++++++++++++++
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  | 14 ++--
 .../ff/lm/StateMinimizingLanguageModel.java     |  4 +-
 .../joshua/decoder/segment_file/Sentence.java   |  4 --
 .../org/apache/joshua/system/KenLmTest.java     |  4 +-
 7 files changed, 88 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
deleted file mode 100644
index 48aa912..0000000
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package org.apache.joshua.decoder;
-
-import org.apache.joshua.decoder.ff.lm.KenLM;
-
-import java.nio.ByteBuffer;
-
-import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
-
-/**
- * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
- * used in a scoped context, and close must be called to release native resources.  It
- * does implement a custom finalizer that will release these resources if needed, but
- * this should not be relied on.
- *
- * @author Kellen Sunderland
- */
-
-public class KenLMPool implements AutoCloseable {
-
-  private final long pool;
-  private final KenLM languageModel;
-  private final ByteBuffer ngramBuffer;
-  private boolean released = false;
-
-  public KenLMPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
-    this.pool = pool;
-    this.languageModel = languageModel;
-    this.ngramBuffer = ngramBuffer;
-  }
-
-  public long getPool() {
-    return pool;
-  }
-
-  @Override
-  protected void finalize() throws Throwable {
-    close();
-    super.finalize();
-  }
-
-  @Override
-  public synchronized void close() {
-    if (!released) {
-      released = true;
-      languageModel.destroyLMPool(pool);
-    }
-  }
-
-  /**
-   * Write a single id to the KenLM shared buffer.
-   * Note: This method must be used in conjunction with setBufferLength.
-   *
-   * @param index index at which to write id.
-   * @param id    word id to write.
-   */
-  public void writeIdToBuffer(int index, long id) {
-    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
-  }
-
-  /**
-   * Manually set the length of the ngram array to be used when calling probRule or estimate on
-   * KenLM.
-   * Note: Must be used if you are calling writeIdToBuffer.
-   *
-   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
-   */
-  public void setBufferLength(long length) {
-    ngramBuffer.putLong(0, length);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
index 6a3c4b3..82c3212 100644
--- a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
+++ b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
@@ -11,10 +11,10 @@ import java.util.UUID;
  */
 public class LanguageModelStateManager {
 
-  private Map<UUID, KenLMPool> languageModelPoolMapping = new HashMap<>();
+  private Map<UUID, LmPool> languageModelPoolMapping = new HashMap<>();
 
-  public KenLMPool getStatePool(UUID languageModelId, KenLM languageModel) {
-    KenLMPool statePool = languageModelPoolMapping.get(languageModelId);
+  public LmPool getStatePool(UUID languageModelId, KenLM languageModel) {
+    LmPool statePool = languageModelPoolMapping.get(languageModelId);
     if (statePool == null) {
       statePool = languageModel.createLMPool();
       languageModelPoolMapping.put(languageModelId, statePool);
@@ -23,7 +23,7 @@ public class LanguageModelStateManager {
   }
 
   public void clearStatePool() {
-    languageModelPoolMapping.values().forEach(KenLMPool::close);
+    languageModelPoolMapping.values().forEach(LmPool::close);
     languageModelPoolMapping.clear();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LmPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LmPool.java b/src/main/java/org/apache/joshua/decoder/LmPool.java
new file mode 100644
index 0000000..73af0a6
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/LmPool.java
@@ -0,0 +1,70 @@
+package org.apache.joshua.decoder;
+
+import org.apache.joshua.decoder.ff.lm.KenLM;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
+/**
+ * Class to wrap a KenLM pool of states.  This class is not ThreadSafe.  It should be
+ * used in a scoped context, and close must be called to release native resources.  It
+ * does implement a custom finalizer that will release these resources if needed, but
+ * this should not be relied on.
+ *
+ * @author Kellen Sunderland
+ */
+
+public abstract class LmPool implements AutoCloseable {
+
+  private final long pool;
+  private final KenLM languageModel;
+  private final ByteBuffer ngramBuffer;
+  private boolean released = false;
+
+  public LmPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
+    this.pool = pool;
+    this.languageModel = languageModel;
+    this.ngramBuffer = ngramBuffer;
+  }
+
+  public long getPool() {
+    return pool;
+  }
+
+  @Override
+  protected void finalize() throws Throwable {
+    close();
+    super.finalize();
+  }
+
+  @Override
+  public synchronized void close() {
+    if (!released) {
+      released = true;
+      languageModel.destroyLMPool(pool);
+    }
+  }
+
+  /**
+   * Write a single id to the KenLM shared buffer.
+   * Note: This method must be used in conjunction with setBufferLength.
+   *
+   * @param index index at which to write id.
+   * @param id    word id to write.
+   */
+  public void writeIdToBuffer(int index, long id) {
+    this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
+  }
+
+  /**
+   * Manually set the length of the ngram array to be used when calling probRule or estimate on
+   * KenLM.
+   * Note: Must be used if you are calling writeIdToBuffer.
+   *
+   * @param length The size of the array of ngrams you would like to use with probRule or estimate.
+   */
+  public void setBufferLength(long length) {
+    ngramBuffer.putLong(0, length);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index d138495..98bba83 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -18,8 +18,9 @@
  */
 package org.apache.joshua.decoder.ff.lm;
 
+import javafx.scene.Parent;
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
 import org.apache.joshua.util.FormatUtils;
 import org.slf4j.Logger;
@@ -108,11 +109,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     }
   }
 
-  public KenLMPool createLMPool() {
+  public LmPool createLMPool() {
     ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
     ngramBuffer.order(LITTLE_ENDIAN);
     long pool = createPool(ngramBuffer);
-    return new KenLMPool(pool, this, ngramBuffer);
+    return new KenLMPool(pool, ngramBuffer);
   }
 
   public void destroyLMPool(long pointer) {
@@ -164,7 +165,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
    * @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
    * KenLM state and the LM probability incurred along this rule
    */
-  public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
+  public StateProbPair probRule(long[] words, LmPool poolWrapper) {
 
     poolWrapper.setBufferLength(words.length);
     for (int i = 0; i < words.length; i++) {
@@ -271,4 +272,9 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
     }
   }
 
+  private class KenLMPool extends LmPool {
+    protected KenLMPool(long pool, ByteBuffer ngramBuffer) {
+      super(pool, KenLM.this, ngramBuffer);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index c3281d6..f15db05 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -25,7 +25,7 @@ import java.util.UUID;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.chart_parser.SourcePath;
 import org.apache.joshua.decoder.ff.FeatureVector;
 import org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair;
@@ -115,7 +115,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
      // map to ken lm ids
     final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
 
-    KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
+    LmPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
             languageModel);
 
     // Get the probability of applying the rule and the new state

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index f84c41a..1d8712d 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -21,20 +21,16 @@ package org.apache.joshua.decoder.segment_file;
 import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.StringTokenizer;
-import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.LanguageModelStateManager;
 import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.lattice.Arc;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 2d129f1..7c42485 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -19,7 +19,7 @@
 package org.apache.joshua.system;
 
 import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
 import org.apache.joshua.decoder.ff.lm.KenLM;
 import org.apache.joshua.util.io.KenLmTestUtil;
 import org.testng.annotations.AfterMethod;
@@ -99,7 +99,7 @@ public class KenLmTest {
 
     // WHEN
     KenLM.StateProbPair result;
-    try (KenLMPool poolPointer = kenLm.createLMPool()) {
+    try (LmPool poolPointer = kenLm.createLMPool()) {
       result = kenLm.probRule(longIds, poolPointer);
     }