You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/01/25 14:54:47 UTC

svn commit: r1438519 - in /lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing: CompressionMode.java LZ4.java

Author: jpountz
Date: Fri Jan 25 13:54:46 2013
New Revision: 1438519

URL: http://svn.apache.org/viewvc?rev=1438519&view=rev
Log:
LUCENE-4708: Reuse LZ4 hash tables across calls.

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java?rev=1438519&r1=1438518&r2=1438519&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java Fri Jan 25 13:54:46 2013
@@ -45,7 +45,7 @@ public abstract class CompressionMode {
 
     @Override
     public Compressor newCompressor() {
-      return LZ4_FAST_COMPRESSOR;
+      return new LZ4FastCompressor();
     }
 
     @Override
@@ -95,7 +95,7 @@ public abstract class CompressionMode {
 
     @Override
     public Compressor newCompressor() {
-      return LZ4_HIGH_COMPRESSOR;
+      return new LZ4HighCompressor();
     }
 
     @Override
@@ -147,25 +147,37 @@ public abstract class CompressionMode {
 
   };
 
-  private static final Compressor LZ4_FAST_COMPRESSOR = new Compressor() {
+  private static final class LZ4FastCompressor extends Compressor {
+
+    private final LZ4.HashTable ht;
+
+    LZ4FastCompressor() {
+      ht = new LZ4.HashTable();
+    }
 
     @Override
     public void compress(byte[] bytes, int off, int len, DataOutput out)
         throws IOException {
-      LZ4.compress(bytes, off, len, out);
+      LZ4.compress(bytes, off, len, out, ht);
     }
 
-  };
+  }
 
-  private static final Compressor LZ4_HIGH_COMPRESSOR = new Compressor() {
+  private static final class LZ4HighCompressor extends Compressor {
+
+    private final LZ4.HCHashTable ht;
+
+    LZ4HighCompressor() {
+      ht = new LZ4.HCHashTable();
+    }
 
     @Override
     public void compress(byte[] bytes, int off, int len, DataOutput out)
         throws IOException {
-      LZ4.compressHC(bytes, off, len, out);
+      LZ4.compressHC(bytes, off, len, out, ht);
     }
 
-  };
+  }
 
   private static final class DeflateDecompressor extends Decompressor {
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java?rev=1438519&r1=1438518&r2=1438519&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java Fri Jan 25 13:54:46 2013
@@ -30,7 +30,7 @@ import org.apache.lucene.util.packed.Pac
  * http://code.google.com/p/lz4/
  * http://fastcompression.blogspot.fr/p/lz4.html
  */
-class LZ4 {
+final class LZ4 {
 
   private LZ4() {}
 
@@ -181,11 +181,29 @@ class LZ4 {
     }
   }
 
+  static final class HashTable {
+    private int hashLog;
+    private PackedInts.Mutable hashTable;
+
+    void reset(int len) {
+      final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
+      final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
+      hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
+      if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
+        hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
+      } else {
+        hashTable.clear();
+      }
+    }
+
+  }
+
   /**
    * Compress <code>bytes[off:off+len]</code> into <code>out</code> using
-   * at most 16KB of memory.
+   * at most 16KB of memory. <code>ht</code> shouldn't be shared across threads
+   * but can safely be reused.
    */
-  public static void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
+  public static void compress(byte[] bytes, int off, int len, DataOutput out, HashTable ht) throws IOException {
 
     final int base = off;
     final int end = off + len;
@@ -196,11 +214,9 @@ class LZ4 {
 
       final int limit = end - LAST_LITERALS;
       final int matchLimit = limit - MIN_MATCH;
-
-      final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
-      final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
-      final int hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
-      final PackedInts.Mutable hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
+      ht.reset(len);
+      final int hashLog = ht.hashLog;
+      final PackedInts.Mutable hashTable = ht.hashTable;
 
       main:
       while (off < limit) {
@@ -256,20 +272,24 @@ class LZ4 {
     m2.ref = m1.ref;
   }
 
-  private static class HashTable {
+  static final class HCHashTable {
     static final int MAX_ATTEMPTS = 256;
     static final int MASK = MAX_DISTANCE - 1;
     int nextToUpdate;
-    private final int base;
+    private int base;
     private final int[] hashTable;
     private final short[] chainTable;
 
-    HashTable(int base) {
+    HCHashTable() {
+      hashTable = new int[HASH_TABLE_SIZE_HC];
+      chainTable = new short[MAX_DISTANCE];
+    }
+
+    private void reset(int base) {
       this.base = base;
       nextToUpdate = base;
-      hashTable = new int[HASH_TABLE_SIZE_HC];
       Arrays.fill(hashTable, -1);
-      chainTable = new short[MAX_DISTANCE];
+      Arrays.fill(chainTable, (short) 0);
     }
 
     private int hashPointer(byte[] bytes, int off) {
@@ -355,12 +375,14 @@ class LZ4 {
 
   /**
    * Compress <code>bytes[off:off+len]</code> into <code>out</code>. Compared to
-   * {@link LZ4#compress(byte[], int, int, DataOutput)}, this method is slower,
-   * uses more memory (~ 256KB), but should provide better compression ratios
-   * (especially on large inputs) because it chooses the best match among up to
-   * 256 candidates and then performs trade-offs to fix overlapping matches.
+   * {@link LZ4#compress(byte[], int, int, DataOutput, HashTable)}, this method
+   * is slower and uses more memory (~ 256KB per thread) but should provide
+   * better compression ratios (especially on large inputs) because it chooses
+   * the best match among up to 256 candidates and then performs trade-offs to
+   * fix overlapping matches. <code>ht</code> shouldn't be shared across threads
+   * but can safely be reused.
    */
-  public static void compressHC(byte[] src, int srcOff, int srcLen, DataOutput out) throws IOException {
+  public static void compressHC(byte[] src, int srcOff, int srcLen, DataOutput out, HCHashTable ht) throws IOException {
 
     final int srcEnd = srcOff + srcLen;
     final int matchLimit = srcEnd - LAST_LITERALS;
@@ -368,7 +390,7 @@ class LZ4 {
     int sOff = srcOff;
     int anchor = sOff++;
 
-    final HashTable ht = new HashTable(srcOff);
+    ht.reset(srcOff);
     final Match match0 = new Match();
     final Match match1 = new Match();
     final Match match2 = new Match();