You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/09/04 18:11:36 UTC
svn commit: r1520060 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/codecs/compressing/
test/org/apache/lucene/codecs/compressing/
Author: jpountz
Date: Wed Sep 4 16:11:36 2013
New Revision: 1520060
URL: http://svn.apache.org/r1520060
Log:
Fix compression bug on highly compressible inputs with LZ4.compressHC.
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java?rev=1520060&r1=1520059&r2=1520060&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java Wed Sep 4 16:11:36 2013
@@ -219,7 +219,7 @@ final class LZ4 {
final PackedInts.Mutable hashTable = ht.hashTable;
main:
- while (off < limit) {
+ while (off <= limit) {
// find a match
int ref;
while (true) {
@@ -299,7 +299,7 @@ final class LZ4 {
}
private int next(int off) {
- return base + off - (chainTable[off & MASK] & 0xFFFF);
+ return off - (chainTable[off & MASK] & 0xFFFF);
}
private void addHash(byte[] bytes, int off) {
@@ -310,7 +310,7 @@ final class LZ4 {
delta = MAX_DISTANCE - 1;
}
chainTable[off & MASK] = (short) delta;
- hashTable[h] = off - base;
+ hashTable[h] = off;
}
void insert(int off, byte[] bytes) {
@@ -322,12 +322,24 @@ final class LZ4 {
boolean insertAndFindBestMatch(byte[] buf, int off, int matchLimit, Match match) {
match.start = off;
match.len = 0;
+ int delta = 0;
+ int repl = 0;
insert(off, buf);
int ref = hashPointer(buf, off);
+
+ if (ref >= off - 4 && ref <= off && ref >= base) { // potential repetition
+ if (readIntEquals(buf, ref, off)) { // confirmed
+ delta = off - ref;
+ repl = match.len = MIN_MATCH + commonBytes(buf, ref + MIN_MATCH, off + MIN_MATCH, matchLimit);
+ match.ref = ref;
+ }
+ ref = next(ref);
+ }
+
for (int i = 0; i < MAX_ATTEMPTS; ++i) {
- if (ref < Math.max(base, off - MAX_DISTANCE + 1)) {
+ if (ref < Math.max(base, off - MAX_DISTANCE + 1) || ref > off) {
break;
}
if (buf[ref + match.len] == buf[off + match.len] && readIntEquals(buf, ref, off)) {
@@ -340,6 +352,21 @@ final class LZ4 {
ref = next(ref);
}
+ if (repl != 0) {
+ int ptr = off;
+ final int end = off + repl - (MIN_MATCH - 1);
+ while (ptr < end - delta) {
+ chainTable[ptr & MASK] = (short) delta; // pre load
+ ++ptr;
+ }
+ do {
+ chainTable[ptr & MASK] = (short) delta;
+ hashTable[hashHC(readInt(buf, ptr))] = ptr;
+ ++ptr;
+ } while (ptr < end);
+ nextToUpdate = end;
+ }
+
return match.len != 0;
}
@@ -351,7 +378,7 @@ final class LZ4 {
final int delta = off - startLimit;
int ref = hashPointer(buf, off);
for (int i = 0; i < MAX_ATTEMPTS; ++i) {
- if (ref < Math.max(base, off - MAX_DISTANCE + 1)) {
+ if (ref < Math.max(base, off - MAX_DISTANCE + 1) || ref > off) {
break;
}
if (buf[ref - delta + match.len] == buf[startLimit + match.len]
@@ -386,6 +413,7 @@ final class LZ4 {
final int srcEnd = srcOff + srcLen;
final int matchLimit = srcEnd - LAST_LITERALS;
+ final int mfLimit = matchLimit - MIN_MATCH;
int sOff = srcOff;
int anchor = sOff++;
@@ -397,7 +425,7 @@ final class LZ4 {
final Match match3 = new Match();
main:
- while (sOff < matchLimit) {
+ while (sOff <= mfLimit) {
if (!ht.insertAndFindBestMatch(src, sOff, matchLimit, match1)) {
++sOff;
continue;
@@ -409,7 +437,7 @@ final class LZ4 {
search2:
while (true) {
assert match1.start >= anchor;
- if (match1.end() >= matchLimit
+ if (match1.end() >= mfLimit
|| !ht.insertAndFindWiderMatch(src, match1.end() - 2, match1.start + 1, matchLimit, match1.len, match2)) {
// no better match
encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
@@ -445,24 +473,11 @@ final class LZ4 {
}
}
- if (match2.start + match2.len >= matchLimit
+ if (match2.start + match2.len >= mfLimit
|| !ht.insertAndFindWiderMatch(src, match2.end() - 3, match2.start, matchLimit, match2.len, match3)) {
// no better match -> 2 sequences to encode
if (match2.start < match1.end()) {
- if (match2.start - match1.start < OPTIMAL_ML) {
- if (match1.len > OPTIMAL_ML) {
- match1.len = OPTIMAL_ML;
- }
- if (match1.end() > match2.end() - MIN_MATCH) {
- match1.len = match2.end() - match1.start - MIN_MATCH;
- }
- final int correction = match1.len - (match2.start - match1.start);
- if (correction > 0) {
- match2.fix(correction);
- }
- } else {
- match1.len = match2.start - match1.start;
- }
+ match1.len = match2.start - match1.start;
}
// encode seq 1
encodeSequence(src, anchor, match1.ref, match1.start, match1.len, out);
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java?rev=1520060&r1=1520059&r2=1520060&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java Wed Sep 4 16:11:36 2013
@@ -24,6 +24,7 @@ import org.apache.lucene.store.ByteArray
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomInts;
@@ -130,4 +131,10 @@ public abstract class AbstractTestCompre
test(decompressed);
}
+ public void testConstant() throws IOException {
+ final byte[] decompressed = new byte[_TestUtil.nextInt(random(), 1, 10000)];
+ Arrays.fill(decompressed, (byte) random().nextInt());
+ test(decompressed);
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java?rev=1520060&r1=1520059&r2=1520060&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestLZ4CompressionMode.java Wed Sep 4 16:11:36 2013
@@ -104,4 +104,8 @@ public abstract class AbstractTestLZ4Com
test(decompressed);
}
+ public void testMatchRightBeforeLastLiterals() throws IOException {
+ test(new byte[] {1,2,3,4, 1,2,3,4, 1,2,3,4,5});
+ }
+
}