You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/02 00:56:26 UTC
[03/10] incubator-joshua git commit: Add an LRU cache from Google
Guava to decrease allocations in the PackedGrammer getRules() call Results in
a 1.5 times speedup in decoding and a large decrease in required garbage
collection
Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call
Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e70677d2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e70677d2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e70677d2
Branch: refs/heads/master
Commit: e70677d2eab23daa7082173e6fe337d68aa12230
Parents: 0990ebc
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 22 13:37:54 2015 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:42 2016 +0200
----------------------------------------------------------------------
bin/joshua-decoder | 2 +-
build.xml | 1 +
lib/ivy.xml | 1 +
pom.xml | 5 +++++
src/joshua/decoder/JoshuaConfiguration.java | 7 +++++++
.../decoder/ff/tm/packed/PackedGrammar.java | 20 +++++++++++++++++++-
6 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/bin/joshua-decoder
----------------------------------------------------------------------
diff --git a/bin/joshua-decoder b/bin/joshua-decoder
index 57c09f1..cdb2cf4 100755
--- a/bin/joshua-decoder
+++ b/bin/joshua-decoder
@@ -27,7 +27,7 @@ set -u
JOSHUA=$(dirname $0)/..
exec java -Xmx${mem} \
- -cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar \
+ -cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar:$JOSHUA/lib/guava-19.0.jar \
-Dfile.encoding=utf8 \
-Djava.util.logging.config.file=${JOSHUA}/logging.properties \
-Djava.library.path=$JOSHUA/lib \
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 6456721..7095ca2 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,7 @@
<include name="collections-generic-4.01.jar" />
<include name="args4j-2.0.29.jar" />
<include name="gson-2.5.jar" />
+ <include name="guava-19.0.jar" />
</fileset>
<fileset dir="${thraxlib}">
<include name="thrax.jar" />
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/lib/ivy.xml
----------------------------------------------------------------------
diff --git a/lib/ivy.xml b/lib/ivy.xml
index 02f3ff7..d41595d 100644
--- a/lib/ivy.xml
+++ b/lib/ivy.xml
@@ -12,5 +12,6 @@
<dependency org="net.sourceforge.collections" name="collections-generic" rev="4.01"/>
<dependency org="args4j" name="args4j" rev="2.0.29" />
<dependency org="com.google.code.gson" name="gson" rev="2.5"/>
+ <dependency org="com.google.guava" name="guava" rev="19.0"/>
</dependencies>
</ivy-module>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3b4aac1..de75e80 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,5 +122,10 @@
<version>4.10</version>
<optional>true</optional>
</dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>19.0</version>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index ece18d2..49ab87d 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -33,6 +33,10 @@ public class JoshuaConfiguration {
// List of grammar files to read
public ArrayList<String> tms = new ArrayList<String>();
+ // A rule cache for commonly used tries to avoid excess object allocations
+ // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+ public Integer cachedRuleSize = new Integer(5000);
+
/*
* The file to read the weights from (part of the sparse features implementation). Weights can
* also just be listed in the main config file.
@@ -609,6 +613,9 @@ public class JoshuaConfiguration {
// Check source sentence
source_annotations = true;
+ } else if (parameter.equals(normalize_key("cached-rules-size"))) {
+ // Check source sentence
+ cachedRuleSize = Integer.parseInt(fds[1]);
} else {
if (parameter.equals(normalize_key("use-sent-specific-tm"))
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index df5538a..dc72a4b 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -79,6 +79,9 @@ import joshua.util.encoding.EncoderConfiguration;
import joshua.util.encoding.FloatEncoder;
import joshua.util.io.LineReader;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
public class PackedGrammar extends AbstractGrammar {
private EncoderConfiguration encoding;
@@ -92,6 +95,10 @@ public class PackedGrammar extends AbstractGrammar {
// The grammar specification keyword (e.g., "thrax" or "moses")
private String type;
+ // A rule cache for commonly used tries to avoid excess object allocations
+ // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+ private final Cache<Trie, List<Rule>> cached_rules;
+
public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
super(joshuaConfiguration);
@@ -132,6 +139,7 @@ public class PackedGrammar extends AbstractGrammar {
for (PackedSlice s : slices)
count += s.estimated.length;
root = new PackedRoot(slices);
+ cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build();
Decoder.LOG(1, String.format("Loaded %d rules", count));
}
@@ -618,17 +626,24 @@ public class PackedGrammar extends AbstractGrammar {
@Override
public List<Rule> getRules() {
+ List<Rule> rules = cached_rules.getIfPresent(this);
+ if (rules != null) {
+ return rules;
+ }
+
int num_children = source[position];
int rule_position = position + 2 * (num_children + 1);
int num_rules = source[rule_position - 1];
- ArrayList<Rule> rules = new ArrayList<Rule>(num_rules);
+ rules = new ArrayList<Rule>(num_rules);
for (int i = 0; i < num_rules; i++) {
if (type.equals("moses") || type.equals("phrase"))
rules.add(new PackedPhrasePair(rule_position + 3 * i));
else
rules.add(new PackedRule(rule_position + 3 * i));
}
+
+ cached_rules.put(this, rules);
return rules;
}
@@ -684,6 +699,9 @@ public class PackedGrammar extends AbstractGrammar {
}
for (int i = 0; i < sorted.length; i++)
source[rule_position + i] = sorted[i];
+
+ // Replace rules in cache with their sorted values on next getRules()
+ cached_rules.invalidate(this);
this.sorted = true;
}