You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2020/09/03 15:05:19 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9500: Separate the
Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater
(#1824)
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new d031f8e LUCENE-9500: Separate the Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater (#1824)
d031f8e is described below
commit d031f8e7bd62eb3a894cf436f81d398e3d7db493
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Thu Sep 3 17:00:26 2020 +0200
LUCENE-9500: Separate the Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater (#1824)
---
.../codecs/lucene87/BugfixDeflater_JDK8252739.java | 112 +++++++++++++++++++++
.../DeflateWithPresetDictCompressionMode.java | 13 +--
...tLucene87StoredFieldsFormatHighCompression.java | 4 +
3 files changed, 121 insertions(+), 8 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java
new file mode 100644
index 0000000..ae5b901
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene87;
+
+import java.util.Arrays;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+/**
+ * This class is a workaround for JDK bug
+ * <a href="https://bugs.openjdk.java.net/browse/JDK-8252739">JDK-8252739</a>.
+ */
+final class BugfixDeflater_JDK8252739 extends Deflater {
+
+ public static final boolean IS_BUGGY_JDK = detectBuggyJDK();
+
+ /**
+ * Creates a {@link Deflater} instance, which works around JDK-8252739.
+ * <p>
+ * Use this whenever you intend to call {@link #setDictionary(byte[], int, int)} or
+ * {@link #setDictionary(java.nio.ByteBuffer)} on a {@code Deflater}.
+ * */
+ public static Deflater createDeflaterInstance(int level, boolean nowrap, int dictLength) {
+ if (dictLength < 0) {
+ throw new IllegalArgumentException("dictLength must be >= 0");
+ }
+ if (IS_BUGGY_JDK) {
+ return new BugfixDeflater_JDK8252739(level, nowrap, dictLength);
+ } else {
+ return new Deflater(level, nowrap);
+ }
+ }
+
+
+ private final byte[] dictBytesScratch;
+
+ private BugfixDeflater_JDK8252739(int level, boolean nowrap, int dictLength) {
+ super(level, nowrap);
+ this.dictBytesScratch = new byte[dictLength];
+ }
+
+ @Override
+ public void setDictionary(byte[] dictBytes, int off, int len) {
+ if (off > 0) {
+ System.arraycopy(dictBytes, off, dictBytesScratch, 0, len);
+ super.setDictionary(dictBytesScratch, 0, len);
+ } else {
+ super.setDictionary(dictBytes, off, len);
+ }
+ }
+
+ private static boolean detectBuggyJDK() {
+ final byte[] testData = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
+ final byte[] compressed = new byte[32]; // way enough space
+ final Deflater deflater = new Deflater(6, true);
+ int compressedSize;
+ try {
+ deflater.reset();
+ deflater.setDictionary(testData, 4, 4);
+ deflater.setInput(testData);
+ deflater.finish();
+ compressedSize = deflater.deflate(compressed, 0, compressed.length, Deflater.FULL_FLUSH);
+ } finally {
+ deflater.end();
+ }
+
+ // in nowrap mode we need extra 0-byte as padding, add explicit:
+ compressed[compressedSize] = 0;
+ compressedSize++;
+
+ final Inflater inflater = new Inflater(true);
+ final byte[] restored = new byte[testData.length];
+ try {
+ inflater.reset();
+ inflater.setDictionary(testData, 4, 4);
+ inflater.setInput(compressed, 0, compressedSize);
+ final int restoredLength = inflater.inflate(restored);
+ if (restoredLength != testData.length) {
+ return true;
+ }
+ } catch (DataFormatException e) {
+ return true;
+ } catch(RuntimeException e) {
+ return true;
+ } finally {
+ inflater.end();
+ }
+
+ if (Arrays.equals(testData, restored) == false) {
+ return true;
+ }
+
+ // all fine
+ return false;
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
index 09d52ad..91c1480 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
@@ -155,16 +155,15 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
private static class DeflateWithPresetDictCompressor extends Compressor {
- final byte[] dictBytes;
- final int blockLength;
+ private final int dictLength, blockLength;
final Deflater compressor;
byte[] compressed;
boolean closed;
DeflateWithPresetDictCompressor(int level, int dictLength, int blockLength) {
- compressor = new Deflater(level, true);
+ compressor = BugfixDeflater_JDK8252739.createDeflaterInstance(level, true, dictLength);
compressed = new byte[64];
- this.dictBytes = new byte[dictLength];
+ this.dictLength = dictLength;
this.blockLength = blockLength;
}
@@ -197,8 +196,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
@Override
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
- final int dictLength = Math.min(dictBytes.length, len);
- System.arraycopy(bytes, off, dictBytes, 0, dictLength);
+ final int dictLength = Math.min(this.dictLength, len);
out.writeVInt(dictLength);
out.writeVInt(blockLength);
final int end = off + len;
@@ -210,8 +208,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
// And then sub blocks
for (int start = off + dictLength; start < end; start += blockLength) {
compressor.reset();
- // NOTE: offset MUST be 0 when setting the dictionary in order to work around JDK-8252739
- compressor.setDictionary(dictBytes, 0, dictLength);
+ compressor.setDictionary(bytes, off, dictLength);
doCompress(bytes, start, Math.min(blockLength, off + len - start), out);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
index f4ebca6..b6dc5a5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
@@ -77,4 +77,8 @@ public class TestLucene87StoredFieldsFormatHighCompression extends BaseStoredFie
new Lucene87StoredFieldsFormat(null);
});
}
+
+ public void testShowJDKBugStatus() {
+ System.err.println("JDK is buggy (JDK-8252739): " + BugfixDeflater_JDK8252739.IS_BUGGY_JDK);
+ }
}