You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:45:48 UTC
[4/6] incubator-joshua git commit: Explicitly bind KenLMs to LmPool
objects
Explicitly bind KenLMs to LmPool objects
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e9f4f5b1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e9f4f5b1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e9f4f5b1
Branch: refs/heads/master
Commit: e9f4f5b1468364a658f90c168e2b8ec69c3fa48e
Parents: d9c3d7e
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 27 18:29:50 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 27 18:29:50 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/decoder/KenLMPool.java | 70 --------------------
.../decoder/LanguageModelStateManager.java | 8 +--
.../java/org/apache/joshua/decoder/LmPool.java | 70 ++++++++++++++++++++
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 14 ++--
.../ff/lm/StateMinimizingLanguageModel.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 4 --
.../org/apache/joshua/system/KenLmTest.java | 4 +-
7 files changed, 88 insertions(+), 86 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/KenLMPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/KenLMPool.java b/src/main/java/org/apache/joshua/decoder/KenLMPool.java
deleted file mode 100644
index 48aa912..0000000
--- a/src/main/java/org/apache/joshua/decoder/KenLMPool.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package org.apache.joshua.decoder;
-
-import org.apache.joshua.decoder.ff.lm.KenLM;
-
-import java.nio.ByteBuffer;
-
-import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
-
-/**
- * Class to wrap a KenLM pool of states. This class is not ThreadSafe. It should be
- * used in a scoped context, and close must be called to release native resources. It
- * does implement a custom finalizer that will release these resources if needed, but
- * this should not be relied on.
- *
- * @author Kellen Sunderland
- */
-
-public class KenLMPool implements AutoCloseable {
-
- private final long pool;
- private final KenLM languageModel;
- private final ByteBuffer ngramBuffer;
- private boolean released = false;
-
- public KenLMPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
- this.pool = pool;
- this.languageModel = languageModel;
- this.ngramBuffer = ngramBuffer;
- }
-
- public long getPool() {
- return pool;
- }
-
- @Override
- protected void finalize() throws Throwable {
- close();
- super.finalize();
- }
-
- @Override
- public synchronized void close() {
- if (!released) {
- released = true;
- languageModel.destroyLMPool(pool);
- }
- }
-
- /**
- * Write a single id to the KenLM shared buffer.
- * Note: This method must be used in conjunction with setBufferLength.
- *
- * @param index index at which to write id.
- * @param id word id to write.
- */
- public void writeIdToBuffer(int index, long id) {
- this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
- }
-
- /**
- * Manually set the length of the ngram array to be used when calling probRule or estimate on
- * KenLM.
- * Note: Must be used if you are calling writeIdToBuffer.
- *
- * @param length The size of the array of ngrams you would like to use with probRule or estimate.
- */
- public void setBufferLength(long length) {
- ngramBuffer.putLong(0, length);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
index 6a3c4b3..82c3212 100644
--- a/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
+++ b/src/main/java/org/apache/joshua/decoder/LanguageModelStateManager.java
@@ -11,10 +11,10 @@ import java.util.UUID;
*/
public class LanguageModelStateManager {
- private Map<UUID, KenLMPool> languageModelPoolMapping = new HashMap<>();
+ private Map<UUID, LmPool> languageModelPoolMapping = new HashMap<>();
- public KenLMPool getStatePool(UUID languageModelId, KenLM languageModel) {
- KenLMPool statePool = languageModelPoolMapping.get(languageModelId);
+ public LmPool getStatePool(UUID languageModelId, KenLM languageModel) {
+ LmPool statePool = languageModelPoolMapping.get(languageModelId);
if (statePool == null) {
statePool = languageModel.createLMPool();
languageModelPoolMapping.put(languageModelId, statePool);
@@ -23,7 +23,7 @@ public class LanguageModelStateManager {
}
public void clearStatePool() {
- languageModelPoolMapping.values().forEach(KenLMPool::close);
+ languageModelPoolMapping.values().forEach(LmPool::close);
languageModelPoolMapping.clear();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/LmPool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/LmPool.java b/src/main/java/org/apache/joshua/decoder/LmPool.java
new file mode 100644
index 0000000..73af0a6
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/LmPool.java
@@ -0,0 +1,70 @@
+package org.apache.joshua.decoder;
+
+import org.apache.joshua.decoder.ff.lm.KenLM;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.joshua.util.Constants.LONG_SIZE_IN_BYTES;
+
+/**
+ * Class to wrap a KenLM pool of states. This class is not ThreadSafe. It should be
+ * used in a scoped context, and close must be called to release native resources. It
+ * does implement a custom finalizer that will release these resources if needed, but
+ * this should not be relied on.
+ *
+ * @author Kellen Sunderland
+ */
+
+public abstract class LmPool implements AutoCloseable {
+
+ private final long pool;
+ private final KenLM languageModel;
+ private final ByteBuffer ngramBuffer;
+ private boolean released = false;
+
+ public LmPool(long pool, KenLM languageModel, ByteBuffer ngramBuffer) {
+ this.pool = pool;
+ this.languageModel = languageModel;
+ this.ngramBuffer = ngramBuffer;
+ }
+
+ public long getPool() {
+ return pool;
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ close();
+ super.finalize();
+ }
+
+ @Override
+ public synchronized void close() {
+ if (!released) {
+ released = true;
+ languageModel.destroyLMPool(pool);
+ }
+ }
+
+ /**
+ * Write a single id to the KenLM shared buffer.
+ * Note: This method must be used in conjunction with setBufferLength.
+ *
+ * @param index index at which to write id.
+ * @param id word id to write.
+ */
+ public void writeIdToBuffer(int index, long id) {
+ this.ngramBuffer.putLong((index + 1) * LONG_SIZE_IN_BYTES, id);
+ }
+
+ /**
+ * Manually set the length of the ngram array to be used when calling probRule or estimate on
+ * KenLM.
+ * Note: Must be used if you are calling writeIdToBuffer.
+ *
+ * @param length The size of the array of ngrams you would like to use with probRule or estimate.
+ */
+ public void setBufferLength(long length) {
+ ngramBuffer.putLong(0, length);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index d138495..98bba83 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -18,8 +18,9 @@
*/
package org.apache.joshua.decoder.ff.lm;
+import javafx.scene.Parent;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
import org.apache.joshua.decoder.ff.state_maintenance.KenLMState;
import org.apache.joshua.util.FormatUtils;
import org.slf4j.Logger;
@@ -108,11 +109,11 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
}
}
- public KenLMPool createLMPool() {
+ public LmPool createLMPool() {
ByteBuffer ngramBuffer = ByteBuffer.allocateDirect(MAX_TARGET_LENGTH * LONG_SIZE_IN_BYTES);
ngramBuffer.order(LITTLE_ENDIAN);
long pool = createPool(ngramBuffer);
- return new KenLMPool(pool, this, ngramBuffer);
+ return new KenLMPool(pool, ngramBuffer);
}
public void destroyLMPool(long pointer) {
@@ -164,7 +165,7 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
* @return the updated {@link org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair} e.g.
* KenLM state and the LM probability incurred along this rule
*/
- public StateProbPair probRule(long[] words, KenLMPool poolWrapper) {
+ public StateProbPair probRule(long[] words, LmPool poolWrapper) {
poolWrapper.setBufferLength(words.length);
for (int i = 0; i < words.length; i++) {
@@ -271,4 +272,9 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
}
}
+ private class KenLMPool extends LmPool {
+ protected KenLMPool(long pool, ByteBuffer ngramBuffer) {
+ super(pool, KenLM.this, ngramBuffer);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
index c3281d6..f15db05 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/StateMinimizingLanguageModel.java
@@ -25,7 +25,7 @@ import java.util.UUID;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.lm.KenLM.StateProbPair;
@@ -115,7 +115,7 @@ public class StateMinimizingLanguageModel extends LanguageModelFF {
// map to ken lm ids
final long[] words = mapToKenLmIds(ruleWords, tailNodes, false);
- KenLMPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
+ LmPool statePool = sentence.getStateManager().getStatePool(languageModelPoolId, (KenLM)
languageModel);
// Get the probability of applying the rule and the new state
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index f84c41a..1d8712d 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -21,20 +21,16 @@ package org.apache.joshua.decoder.segment_file;
import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
-import java.util.Map;
import java.util.StringTokenizer;
-import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
import org.apache.joshua.decoder.LanguageModelStateManager;
import org.apache.joshua.decoder.ff.tm.Grammar;
import org.apache.joshua.lattice.Arc;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e9f4f5b1/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 2d129f1..7c42485 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -19,7 +19,7 @@
package org.apache.joshua.system;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.KenLMPool;
+import org.apache.joshua.decoder.LmPool;
import org.apache.joshua.decoder.ff.lm.KenLM;
import org.apache.joshua.util.io.KenLmTestUtil;
import org.testng.annotations.AfterMethod;
@@ -99,7 +99,7 @@ public class KenLmTest {
// WHEN
KenLM.StateProbPair result;
- try (KenLMPool poolPointer = kenLm.createLMPool()) {
+ try (LmPool poolPointer = kenLm.createLMPool()) {
result = kenLm.probRule(longIds, poolPointer);
}