You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/03 05:07:39 UTC
[2/6] incubator-joshua git commit: Optimized allocations with
sub-array indexes
Optimized allocations with sub-array indexes
------------
last 10 minutes of recordings taken
Statistics:
Before
Total TLAB: 1,391.77 GB
Allocation rate: 2.32 GB/s
After
Total TLAB: 1,320.95 GB
Allocation rate: 2.20 GB/s
------------
Results
-10 GB/s allocation rate
java.utils.Arrays.copyOfRange -70GB allocations from joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley.sentenceLogProbability
10K translation performance testing
Before: avg 5.29
After: avg 5.24
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/39f59a8d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/39f59a8d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/39f59a8d
Branch: refs/heads/master
Commit: 39f59a8d7950f362cc52b2414dbd53efc130e404
Parents: 7fd3cfc
Author: Pavel Danchenko <da...@amazon.com>
Authored: Wed Feb 10 17:12:15 2016 +0100
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon May 2 12:23:39 2016 -0700
----------------------------------------------------------------------
.../ff/lm/berkeley_lm/LMGrammarBerkeley.java | 49 +++++++++++++-------
.../LMBerkeleySentenceProbablityTest.java | 29 ++++++++++++
2 files changed, 62 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/39f59a8d/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
index d5728cf..2716576 100644
--- a/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
+++ b/src/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
@@ -24,6 +24,8 @@ import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.Logger;
+import com.google.common.annotations.VisibleForTesting;
+
import joshua.corpus.Vocabulary;
import joshua.decoder.ff.lm.DefaultNGramLanguageModel;
import joshua.decoder.Decoder;
@@ -37,7 +39,7 @@ import edu.berkeley.nlp.lm.util.StrUtils;
/**
* This class wraps Berkeley LM.
- *
+ *
* @author adpauls@gmail.com
*/
public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
@@ -120,9 +122,9 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
for (int j = startIndex; j < order && j <= sentenceLength; j++) {
// TODO: startIndex dependens on the order, e.g., this.ngramOrder-1 (in srilm, for 3-gram lm,
// start_index=2. othercase, need to check)
- int[] ngram = Arrays.copyOfRange(sentence, 0, j);
- double logProb = ngramLogProbability_helper(ngram, false);
+ double logProb = ngramLogProbability_helper(sentence, 0, j, false);
if (logger.isLoggable(Level.FINE)) {
+ int[] ngram = Arrays.copyOfRange(sentence, 0, j);
String words = Vocabulary.getWords(ngram);
logger.fine("\tlogp ( " + words + " ) = " + logProb);
}
@@ -131,9 +133,9 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
// regular-order ngrams
for (int i = 0; i <= sentenceLength - order; i++) {
- int[] ngram = Arrays.copyOfRange(sentence, i, i + order);
- double logProb = ngramLogProbability_helper(ngram, false);
+ double logProb = ngramLogProbability_helper(sentence, i, order, false);
if (logger.isLoggable(Level.FINE)) {
+ int[] ngram = Arrays.copyOfRange(sentence, i, i + order);
String words = Vocabulary.getWords(ngram);
logger.fine("\tlogp ( " + words + " ) = " + logProb);
}
@@ -147,26 +149,26 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
public float ngramLogProbability_helper(int[] ngram, int order) {
return ngramLogProbability_helper(ngram, false);
}
-
+
protected float ngramLogProbability_helper(int[] ngram, boolean log) {
+ return ngramLogProbability_helper(ngram, 0, ngram.length, log);
+ }
+ protected float ngramLogProbability_helper(int sentence[], int ngramStartPos, int ngramLength, boolean log) {
int[] mappedNgram = arrayScratch.get();
- if (mappedNgram.length < ngram.length) {
- arrayScratch.set(mappedNgram = new int[mappedNgram.length * 2]);
+ if (mappedNgram.length < ngramLength) {
+ mappedNgram = new int[mappedNgram.length * 2];
+ arrayScratch.set(mappedNgram);
}
- for (int i = 0; i < ngram.length; ++i) {
- mappedNgram[i] = vocabIdToMyIdMapping[ngram[i]];
+ for (int i = 0; i < ngramLength; ++i) {
+ mappedNgram[i] = vocabIdToMyIdMapping[sentence[ngramStartPos + i]];
}
if (log && logRequests) {
- final int[] copyOf = Arrays.copyOf(mappedNgram, ngram.length);
- for (int i = 0; i < copyOf.length; ++i)
- if (copyOf[i] < 0) copyOf[i] = unkIndex;
- logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
+ dumpBuffer(mappedNgram, ngramLength);
}
- final float res = lm.getLogProb(mappedNgram, 0, ngram.length);
- return res;
+ return lm.getLogProb(mappedNgram, 0, ngramLength);
}
public static void setLogRequests(Handler handler) {
@@ -183,4 +185,19 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
public float ngramLogProbability(int[] ngram, int order) {
return ngramLogProbability(ngram);
}
+
+ private void dumpBuffer(int[] buffer, int len) {
+ final int[] copyOf = Arrays.copyOf(buffer, len);
+ for (int i = 0; i < copyOf.length; ++i) {
+ if (copyOf[i] < 0) {
+ copyOf[i] = unkIndex;
+ }
+ }
+ logger.finest(StrUtils.join(WordIndexer.StaticMethods.toList(lm.getWordIndexer(), copyOf)));
+ }
+
+ @VisibleForTesting
+ ArrayEncodedNgramLanguageModel<String> getLM() {
+ return lm;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/39f59a8d/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --git a/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java b/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
new file mode 100644
index 0000000..74a832e
--- /dev/null
+++ b/tst/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@ -0,0 +1,29 @@
+package joshua.decoder.ff.lm.berkeley_lm;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
+
+public class LMBerkeleySentenceProbablityTest {
+
+ @Test
+ public void verifySentenceLogProbability() {
+ LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+ grammar.registerWord("the", 2);
+ grammar.registerWord("chat-rooms", 3);
+ grammar.registerWord("<unk>", 0);
+
+ ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
+ float expected =
+ lm.getLogProb(new int[] {}, 0, 0)
+ + lm.getLogProb(new int[] {0}, 0, 1)
+ + lm.getLogProb(new int[] {0, 2}, 0, 2)
+ + lm.getLogProb(new int[] {2, 3}, 0, 2)
+ + lm.getLogProb(new int[] {3, 0}, 0, 2);
+
+ float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
+ assertEquals(expected, result, 0.0);
+ }
+}