You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/09/05 12:08:55 UTC

svn commit: r1520268 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/codecs/compressing/ core/src/test/org/apache/lucene/codecs/compressing/

Author: jpountz
Date: Thu Sep  5 10:08:55 2013
New Revision: 1520268

URL: http://svn.apache.org/r1520268
Log:
LUCENE-5201: Fixed compression bug in LZ4.compressHC.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Sep  5 10:08:55 2013
@@ -156,6 +156,10 @@ Bug Fixes
   outside BMP because it encoded UTF-16 chars instead of codepoints.
   The escaping of codepoints > 127 was removed (not needed for valid HTML)
   and missing escaping for ' and / was added.  (Uwe Schindler)
+
+* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly
+  compressible and the start offset of the array to compress is > 0.
+  (Adrien Grand)
   
 API Changes
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java Thu Sep  5 10:08:55 2013
@@ -295,7 +295,7 @@ final class LZ4 {
     private int hashPointer(byte[] bytes, int off) {
       final int v = readInt(bytes, off);
       final int h = hashHC(v);
-      return base + hashTable[h];
+      return hashTable[h];
     }
 
     private int next(int off) {
@@ -306,6 +306,7 @@ final class LZ4 {
       final int v = readInt(bytes, off);
       final int h = hashHC(v);
       int delta = off - hashTable[h];
+      assert delta > 0 : delta;
       if (delta >= MAX_DISTANCE) {
         delta = MAX_DISTANCE - 1;
       }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java Thu Sep  5 10:08:55 2013
@@ -50,15 +50,15 @@ public abstract class AbstractTestCompre
     return arr;
   }
 
-  byte[] compress(byte[] decompressed) throws IOException {
+  byte[] compress(byte[] decompressed, int off, int len) throws IOException {
     Compressor compressor = mode.newCompressor();
-    return compress(compressor, decompressed);
+    return compress(compressor, decompressed, off, len);
   }
 
-  static byte[] compress(Compressor compressor, byte[] decompressed) throws IOException {
-    byte[] compressed = new byte[decompressed.length * 2 + 16]; // should be enough
+  static byte[] compress(Compressor compressor, byte[] decompressed, int off, int len) throws IOException {
+    byte[] compressed = new byte[len * 2 + 16]; // should be enough
     ByteArrayDataOutput out = new ByteArrayDataOutput(compressed);
-    compressor.compress(decompressed, 0, decompressed.length, out);
+    compressor.compress(decompressed, off, len, out);
     final int compressedLen = out.getPosition();
     return Arrays.copyOf(compressed, compressedLen);
   }
@@ -85,9 +85,11 @@ public abstract class AbstractTestCompre
     final int iterations = atLeast(10);
     for (int i = 0; i < iterations; ++i) {
       final byte[] decompressed = randomArray();
-      final byte[] compressed = compress(decompressed);
-      final byte[] restored = decompress(compressed, decompressed.length);
-      assertArrayEquals(decompressed, restored);
+      final int off = random().nextBoolean() ? 0 : _TestUtil.nextInt(random(), 0, decompressed.length);
+      final int len = random().nextBoolean() ? decompressed.length - off : _TestUtil.nextInt(random(), 0, decompressed.length - off);
+      final byte[] compressed = compress(decompressed, off, len);
+      final byte[] restored = decompress(compressed, len);
+      assertArrayEquals(Arrays.copyOfRange(decompressed, off, off+len), restored);
     }
   }
 
@@ -95,7 +97,7 @@ public abstract class AbstractTestCompre
     final int iterations = atLeast(10);
     for (int i = 0; i < iterations; ++i) {
       final byte[] decompressed = randomArray();
-      final byte[] compressed = compress(decompressed);
+      final byte[] compressed = compress(decompressed, 0, decompressed.length);
       final int offset, length;
       if (decompressed.length == 0) {
         offset = length = 0;
@@ -109,9 +111,13 @@ public abstract class AbstractTestCompre
   }
 
   public byte[] test(byte[] decompressed) throws IOException {
-    final byte[] compressed = compress(decompressed);
-    final byte[] restored = decompress(compressed, decompressed.length);
-    assertEquals(decompressed.length, restored.length);
+    return test(decompressed, 0, decompressed.length);
+  }
+
+  public byte[] test(byte[] decompressed, int off, int len) throws IOException {
+    final byte[] compressed = compress(decompressed, off, len);
+    final byte[] restored = decompress(compressed, len);
+    assertEquals(len, restored.length);
     return compressed;
   }
 
@@ -137,4 +143,58 @@ public abstract class AbstractTestCompre
     test(decompressed);
   }
 
+  public void testLUCENE5201() throws IOException {
+    byte[] data = new byte[]{
+        14, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 85, 3, 72,
+        14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 50, 64, 0, 46, -1, 0, 0, 0, 29, 3, 85,
+        8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3,
+        0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
+        0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
+        0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 50, 64, 0, 47, -105, 0, 0, 0, 30, 3, -97, 6, 0, 68, -113,
+        0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85,
+        8, -113, 0, 68, -97, 3, 0, 2, -97, 6, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+        6, 0, 68, -113, 0, 120, 64, 0, 48, 4, 0, 0, 0, 31, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72,
+        33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72,
+        43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72,
+        28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
+        35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
+        41, 72, 32, 72, 18, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 39, 24, 32, 34, 124, 0, 120, 64, 0, 48, 80, 0, 0, 0, 31, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
+        35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
+        41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72,
+        40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72,
+        31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72,
+        26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72,
+        37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72,
+        36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72,
+        20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72,
+        22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72,
+        38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72,
+        29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72,
+        27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 50, 64, 0, 49, 20, 0, 0, 0, 32, 3, -97, 6, 0,
+        68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+        6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
+        3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
+        3, -97, 6, 0, 50, 64, 0, 50, 53, 0, 0, 0, 34, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
+        6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3,
+        -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97,
+        3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
+        85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0,
+        2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
+        -97, 6, 0, 50, 64, 0, 51, 85, 0, 0, 0, 36, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+        6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, -97, 5, 0, 2, 3, 85, 8, -113, 0, 68,
+        -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0,
+        68, -113, 0, 2, 3, -97, 6, 0, 50, -64, 0, 51, -45, 0, 0, 0, 37, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6,
+        0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
+        6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 120, 64, 0, 52, -88, 0, 0,
+        0, 39, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72,
+        13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+        5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+        5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+        5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 72,
+        13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, -19, -24, -101, -35
+      };
+    test(data, 9, data.length - 9);
+  }
+
 }
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java Thu Sep  5 10:08:55 2013
@@ -32,8 +32,10 @@ import org.apache.lucene.index.RandomInd
 import org.apache.lucene.store.Directory;
 import org.junit.Test;
 
+import com.carrotsearch.randomizedtesting.annotations.Repeat;
 import com.carrotsearch.randomizedtesting.generators.RandomInts;
 
+@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
 public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java Thu Sep  5 10:08:55 2013
@@ -14,6 +14,8 @@ import org.apache.lucene.index.TermsEnum
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 
+import com.carrotsearch.randomizedtesting.annotations.Repeat;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -31,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
  * limitations under the License.
  */
 
+@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
 public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase {
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java Thu Sep  5 10:08:55 2013
@@ -28,9 +28,9 @@ public class TestFastDecompressionMode e
   }
 
   @Override
-  public byte[] test(byte[] decompressed) throws IOException {
-    final byte[] compressed = super.test(decompressed);
-    final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
+  public byte[] test(byte[] decompressed, int off, int len) throws IOException {
+    final byte[] compressed = super.test(decompressed, off, len);
+    final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed, off, len);
     // because of the way this compression mode works, its output is necessarily
     // smaller than the output of CompressionMode.FAST
     assertTrue(compressed.length <= compressed2.length);