You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/02 00:56:26 UTC

[03/10] incubator-joshua git commit: Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection

Add an LRU cache from Google Guava to decrease allocations in the PackedGrammer getRules() call
Results in a 1.5 times speedup in decoding and a large decrease in required garbage collection


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e70677d2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e70677d2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e70677d2

Branch: refs/heads/master
Commit: e70677d2eab23daa7082173e6fe337d68aa12230
Parents: 0990ebc
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 22 13:37:54 2015 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:42 2016 +0200

----------------------------------------------------------------------
 bin/joshua-decoder                              |  2 +-
 build.xml                                       |  1 +
 lib/ivy.xml                                     |  1 +
 pom.xml                                         |  5 +++++
 src/joshua/decoder/JoshuaConfiguration.java     |  7 +++++++
 .../decoder/ff/tm/packed/PackedGrammar.java     | 20 +++++++++++++++++++-
 6 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/bin/joshua-decoder
----------------------------------------------------------------------
diff --git a/bin/joshua-decoder b/bin/joshua-decoder
index 57c09f1..cdb2cf4 100755
--- a/bin/joshua-decoder
+++ b/bin/joshua-decoder
@@ -27,7 +27,7 @@ set -u
 JOSHUA=$(dirname $0)/..
 
 exec java -Xmx${mem} \
- 	-cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar \
+ 	-cp $JOSHUA/class:$JOSHUA/ext/berkeleylm/jar/berkeleylm.jar:$JOSHUA/lib/gson-2.5.jar:$JOSHUA/lib/guava-19.0.jar \
 	-Dfile.encoding=utf8 \
 	-Djava.util.logging.config.file=${JOSHUA}/logging.properties \
 	-Djava.library.path=$JOSHUA/lib \

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 6456721..7095ca2 100644
--- a/build.xml
+++ b/build.xml
@@ -28,6 +28,7 @@
       <include name="collections-generic-4.01.jar" />
       <include name="args4j-2.0.29.jar" />
       <include name="gson-2.5.jar" />
+      <include name="guava-19.0.jar" />
     </fileset>
     <fileset dir="${thraxlib}">
       <include name="thrax.jar" />

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/lib/ivy.xml
----------------------------------------------------------------------
diff --git a/lib/ivy.xml b/lib/ivy.xml
index 02f3ff7..d41595d 100644
--- a/lib/ivy.xml
+++ b/lib/ivy.xml
@@ -12,5 +12,6 @@
     <dependency org="net.sourceforge.collections" name="collections-generic" rev="4.01"/>
     <dependency org="args4j" name="args4j" rev="2.0.29" />
     <dependency org="com.google.code.gson" name="gson" rev="2.5"/>
+    <dependency org="com.google.guava" name="guava" rev="19.0"/>
   </dependencies>
 </ivy-module>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 3b4aac1..de75e80 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,5 +122,10 @@
       <version>4.10</version>
       <optional>true</optional>
     </dependency>
+    <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>19.0</version>
+    </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/JoshuaConfiguration.java b/src/joshua/decoder/JoshuaConfiguration.java
index ece18d2..49ab87d 100644
--- a/src/joshua/decoder/JoshuaConfiguration.java
+++ b/src/joshua/decoder/JoshuaConfiguration.java
@@ -33,6 +33,10 @@ public class JoshuaConfiguration {
   // List of grammar files to read
   public ArrayList<String> tms = new ArrayList<String>();
 
+  // A rule cache for commonly used tries to avoid excess object allocations
+  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+  public Integer cachedRuleSize = new Integer(5000);
+
   /*
    * The file to read the weights from (part of the sparse features implementation). Weights can
    * also just be listed in the main config file.
@@ -609,6 +613,9 @@ public class JoshuaConfiguration {
             // Check source sentence
             source_annotations = true;
 
+          } else if (parameter.equals(normalize_key("cached-rules-size"))) {
+              // Check source sentence
+              cachedRuleSize = Integer.parseInt(fds[1]);
           } else {
 
             if (parameter.equals(normalize_key("use-sent-specific-tm"))

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e70677d2/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index df5538a..dc72a4b 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -79,6 +79,9 @@ import joshua.util.encoding.EncoderConfiguration;
 import joshua.util.encoding.FloatEncoder;
 import joshua.util.io.LineReader;
 
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
 public class PackedGrammar extends AbstractGrammar {
 
   private EncoderConfiguration encoding;
@@ -92,6 +95,10 @@ public class PackedGrammar extends AbstractGrammar {
   // The grammar specification keyword (e.g., "thrax" or "moses")
   private String type;
 
+  // A rule cache for commonly used tries to avoid excess object allocations
+  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
+  private final Cache<Trie, List<Rule>> cached_rules;
+
   public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
       JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
     super(joshuaConfiguration);
@@ -132,6 +139,7 @@ public class PackedGrammar extends AbstractGrammar {
     for (PackedSlice s : slices)
       count += s.estimated.length;
     root = new PackedRoot(slices);
+    cached_rules = CacheBuilder.newBuilder().maximumSize(joshuaConfiguration.cachedRuleSize).build();
 
     Decoder.LOG(1, String.format("Loaded %d rules", count));
   }
@@ -618,17 +626,24 @@ public class PackedGrammar extends AbstractGrammar {
 
       @Override
       public List<Rule> getRules() {
+        List<Rule> rules = cached_rules.getIfPresent(this);
+        if (rules != null) {
+          return rules;
+        }
+
         int num_children = source[position];
         int rule_position = position + 2 * (num_children + 1);
         int num_rules = source[rule_position - 1];
 
-        ArrayList<Rule> rules = new ArrayList<Rule>(num_rules);
+        rules = new ArrayList<Rule>(num_rules);
         for (int i = 0; i < num_rules; i++) {
           if (type.equals("moses") || type.equals("phrase"))
             rules.add(new PackedPhrasePair(rule_position + 3 * i));
           else
             rules.add(new PackedRule(rule_position + 3 * i));
         }
+
+        cached_rules.put(this, rules);
         return rules;
       }
 
@@ -684,6 +699,9 @@ public class PackedGrammar extends AbstractGrammar {
         }
         for (int i = 0; i < sorted.length; i++)
           source[rule_position + i] = sorted[i];
+
+        // Replace rules in cache with their sorted values on next getRules()
+        cached_rules.invalidate(this);
         this.sorted = true;
       }