You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/09/05 12:08:55 UTC
svn commit: r1520268 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/codecs/compressing/
core/src/test/org/apache/lucene/codecs/compressing/
Author: jpountz
Date: Thu Sep 5 10:08:55 2013
New Revision: 1520268
URL: http://svn.apache.org/r1520268
Log:
LUCENE-5201: Fixed compression bug in LZ4.compressHC.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Sep 5 10:08:55 2013
@@ -156,6 +156,10 @@ Bug Fixes
outside BMP because it encoded UTF-16 chars instead of codepoints.
The escaping of codepoints > 127 was removed (not needed for valid HTML)
and missing escaping for ' and / was added. (Uwe Schindler)
+
+* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly
+ compressible and the start offset of the array to compress is > 0.
+ (Adrien Grand)
API Changes
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java Thu Sep 5 10:08:55 2013
@@ -295,7 +295,7 @@ final class LZ4 {
private int hashPointer(byte[] bytes, int off) {
final int v = readInt(bytes, off);
final int h = hashHC(v);
- return base + hashTable[h];
+ return hashTable[h];
}
private int next(int off) {
@@ -306,6 +306,7 @@ final class LZ4 {
final int v = readInt(bytes, off);
final int h = hashHC(v);
int delta = off - hashTable[h];
+ assert delta > 0 : delta;
if (delta >= MAX_DISTANCE) {
delta = MAX_DISTANCE - 1;
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java Thu Sep 5 10:08:55 2013
@@ -50,15 +50,15 @@ public abstract class AbstractTestCompre
return arr;
}
- byte[] compress(byte[] decompressed) throws IOException {
+ byte[] compress(byte[] decompressed, int off, int len) throws IOException {
Compressor compressor = mode.newCompressor();
- return compress(compressor, decompressed);
+ return compress(compressor, decompressed, off, len);
}
- static byte[] compress(Compressor compressor, byte[] decompressed) throws IOException {
- byte[] compressed = new byte[decompressed.length * 2 + 16]; // should be enough
+ static byte[] compress(Compressor compressor, byte[] decompressed, int off, int len) throws IOException {
+ byte[] compressed = new byte[len * 2 + 16]; // should be enough
ByteArrayDataOutput out = new ByteArrayDataOutput(compressed);
- compressor.compress(decompressed, 0, decompressed.length, out);
+ compressor.compress(decompressed, off, len, out);
final int compressedLen = out.getPosition();
return Arrays.copyOf(compressed, compressedLen);
}
@@ -85,9 +85,11 @@ public abstract class AbstractTestCompre
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
final byte[] decompressed = randomArray();
- final byte[] compressed = compress(decompressed);
- final byte[] restored = decompress(compressed, decompressed.length);
- assertArrayEquals(decompressed, restored);
+ final int off = random().nextBoolean() ? 0 : _TestUtil.nextInt(random(), 0, decompressed.length);
+ final int len = random().nextBoolean() ? decompressed.length - off : _TestUtil.nextInt(random(), 0, decompressed.length - off);
+ final byte[] compressed = compress(decompressed, off, len);
+ final byte[] restored = decompress(compressed, len);
+ assertArrayEquals(Arrays.copyOfRange(decompressed, off, off+len), restored);
}
}
@@ -95,7 +97,7 @@ public abstract class AbstractTestCompre
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
final byte[] decompressed = randomArray();
- final byte[] compressed = compress(decompressed);
+ final byte[] compressed = compress(decompressed, 0, decompressed.length);
final int offset, length;
if (decompressed.length == 0) {
offset = length = 0;
@@ -109,9 +111,13 @@ public abstract class AbstractTestCompre
}
public byte[] test(byte[] decompressed) throws IOException {
- final byte[] compressed = compress(decompressed);
- final byte[] restored = decompress(compressed, decompressed.length);
- assertEquals(decompressed.length, restored.length);
+ return test(decompressed, 0, decompressed.length);
+ }
+
+ public byte[] test(byte[] decompressed, int off, int len) throws IOException {
+ final byte[] compressed = compress(decompressed, off, len);
+ final byte[] restored = decompress(compressed, len);
+ assertEquals(len, restored.length);
return compressed;
}
@@ -137,4 +143,58 @@ public abstract class AbstractTestCompre
test(decompressed);
}
+ public void testLUCENE5201() throws IOException {
+ byte[] data = new byte[]{
+ 14, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 85, 3, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 72, 14, 85, 3, 72,
+ 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 85, 3, 72, 14, 50, 64, 0, 46, -1, 0, 0, 0, 29, 3, 85,
+ 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3,
+ 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
+ 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113,
+ 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 50, 64, 0, 47, -105, 0, 0, 0, 30, 3, -97, 6, 0, 68, -113,
+ 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85,
+ 8, -113, 0, 68, -97, 3, 0, 2, -97, 6, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+ 6, 0, 68, -113, 0, 120, 64, 0, 48, 4, 0, 0, 0, 31, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72,
+ 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72,
+ 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72,
+ 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
+ 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
+ 41, 72, 32, 72, 18, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 39, 24, 32, 34, 124, 0, 120, 64, 0, 48, 80, 0, 0, 0, 31, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72,
+ 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72,
+ 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72,
+ 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72,
+ 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72,
+ 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72,
+ 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72,
+ 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72,
+ 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72,
+ 22, 72, 31, 72, 43, 72, 19, 72, 34, 72, 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72,
+ 38, 72, 26, 72, 28, 72, 42, 72, 24, 72, 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 72, 34, 72,
+ 29, 72, 37, 72, 35, 72, 45, 72, 23, 72, 46, 72, 20, 72, 40, 72, 33, 72, 25, 72, 39, 72, 38, 72, 26, 72, 28, 72, 42, 72, 24, 72,
+ 27, 72, 36, 72, 41, 72, 32, 72, 18, 72, 30, 72, 22, 72, 31, 72, 43, 72, 19, 50, 64, 0, 49, 20, 0, 0, 0, 32, 3, -97, 6, 0,
+ 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+ 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
+ 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2,
+ 3, -97, 6, 0, 50, 64, 0, 50, 53, 0, 0, 0, 34, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
+ 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3,
+ -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97,
+ 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
+ 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0,
+ 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3,
+ -97, 6, 0, 50, 64, 0, 51, 85, 0, 0, 0, 36, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97,
+ 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, -97, 5, 0, 2, 3, 85, 8, -113, 0, 68,
+ -97, 3, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0,
+ 68, -113, 0, 2, 3, -97, 6, 0, 50, -64, 0, 51, -45, 0, 0, 0, 37, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6,
+ 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, -97, 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -113, 0, 2, 3, -97,
+ 6, 0, 68, -113, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 2, 3, 85, 8, -113, 0, 68, -97, 3, 0, 120, 64, 0, 52, -88, 0, 0,
+ 0, 39, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72,
+ 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+ 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+ 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85,
+ 5, 72, 13, 85, 5, 72, 13, 72, 13, 72, 13, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, 72,
+ 13, 85, 5, 72, 13, 72, 13, 85, 5, 72, 13, -19, -24, -101, -35
+ };
+ test(data, 9, data.length - 9);
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingStoredFieldsFormat.java Thu Sep 5 10:08:55 2013
@@ -32,8 +32,10 @@ import org.apache.lucene.index.RandomInd
import org.apache.lucene.store.Directory;
import org.junit.Test;
+import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
+@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
@Override
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java Thu Sep 5 10:08:55 2013
@@ -14,6 +14,8 @@ import org.apache.lucene.index.TermsEnum
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
+import com.carrotsearch.randomizedtesting.annotations.Repeat;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -31,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
* limitations under the License.
*/
+@Repeat(iterations=5) // give it a chance to test various compression modes with different chunk sizes
public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase {
@Override
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java?rev=1520268&r1=1520267&r2=1520268&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestFastDecompressionMode.java Thu Sep 5 10:08:55 2013
@@ -28,9 +28,9 @@ public class TestFastDecompressionMode e
}
@Override
- public byte[] test(byte[] decompressed) throws IOException {
- final byte[] compressed = super.test(decompressed);
- final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed);
+ public byte[] test(byte[] decompressed, int off, int len) throws IOException {
+ final byte[] compressed = super.test(decompressed, off, len);
+ final byte[] compressed2 = compress(CompressionMode.FAST.newCompressor(), decompressed, off, len);
// because of the way this compression mode works, its output is necessarily
// smaller than the output of CompressionMode.FAST
assertTrue(compressed.length <= compressed2.length);