You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2020/09/03 15:05:19 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9500: Separate the Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater (#1824)

This is an automated email from the ASF dual-hosted git repository.

uschindler pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new d031f8e  LUCENE-9500: Separate the Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater (#1824)
d031f8e is described below

commit d031f8e7bd62eb3a894cf436f81d398e3d7db493
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Thu Sep 3 17:00:26 2020 +0200

    LUCENE-9500: Separate the Deflater hack from the Lucene code to a subclass of java.util.zip.Deflater (#1824)
---
 .../codecs/lucene87/BugfixDeflater_JDK8252739.java | 112 +++++++++++++++++++++
 .../DeflateWithPresetDictCompressionMode.java      |  13 +--
 ...tLucene87StoredFieldsFormatHighCompression.java |   4 +
 3 files changed, 121 insertions(+), 8 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java
new file mode 100644
index 0000000..ae5b901
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/BugfixDeflater_JDK8252739.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene87;
+
+import java.util.Arrays;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+/**
+ * This class is a workaround for JDK bug
+ * <a href="https://bugs.openjdk.java.net/browse/JDK-8252739">JDK-8252739</a>.
+ */
+final class BugfixDeflater_JDK8252739 extends Deflater {
+  
+  public static final boolean IS_BUGGY_JDK = detectBuggyJDK();
+
+  /**
+   * Creates a {@link Deflater} instance, which works around JDK-8252739.
+   * <p>
+   * Use this whenever you intend to call {@link #setDictionary(byte[], int, int)} or
+   * {@link #setDictionary(java.nio.ByteBuffer)} on a {@code Deflater}.
+   * */
+  public static Deflater createDeflaterInstance(int level, boolean nowrap, int dictLength) {
+    if (dictLength < 0) {
+      throw new IllegalArgumentException("dictLength must be >= 0");
+    }
+    if (IS_BUGGY_JDK) {
+      return new BugfixDeflater_JDK8252739(level, nowrap, dictLength);
+    } else {
+      return new Deflater(level, nowrap);
+    }
+  }
+  
+  
+  private final byte[] dictBytesScratch;
+
+  private BugfixDeflater_JDK8252739(int level, boolean nowrap, int dictLength) {
+    super(level, nowrap);
+    this.dictBytesScratch = new byte[dictLength];
+  }
+  
+  @Override
+  public void setDictionary(byte[] dictBytes, int off, int len) {
+    if (off > 0) {
+      System.arraycopy(dictBytes, off, dictBytesScratch, 0, len);
+      super.setDictionary(dictBytesScratch, 0, len);
+    } else {
+      super.setDictionary(dictBytes, off, len);
+    }
+  }
+
+  private static boolean detectBuggyJDK() {
+    final byte[] testData = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
+    final byte[] compressed = new byte[32]; // way enough space
+    final Deflater deflater = new Deflater(6, true);
+    int compressedSize;
+    try {
+      deflater.reset();
+      deflater.setDictionary(testData, 4, 4);
+      deflater.setInput(testData);
+      deflater.finish();
+      compressedSize = deflater.deflate(compressed, 0, compressed.length, Deflater.FULL_FLUSH);
+    } finally {
+      deflater.end();
+    }
+    
+    // in nowrap mode we need extra 0-byte as padding, add explicit:
+    compressed[compressedSize] = 0;
+    compressedSize++;
+    
+    final Inflater inflater = new Inflater(true);
+    final byte[] restored = new byte[testData.length];
+    try {
+      inflater.reset();
+      inflater.setDictionary(testData, 4, 4);
+      inflater.setInput(compressed, 0, compressedSize);
+      final int restoredLength = inflater.inflate(restored);
+      if (restoredLength != testData.length) {
+        return true;
+      }
+    } catch (DataFormatException e) {
+      return true;
+    } catch(RuntimeException e) {
+      return true;
+    } finally {
+      inflater.end();
+    }
+
+    if (Arrays.equals(testData, restored) == false) {
+      return true;
+    }
+    
+    // all fine
+    return false;
+  }
+  
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
index 09d52ad..91c1480 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/DeflateWithPresetDictCompressionMode.java
@@ -155,16 +155,15 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
 
   private static class DeflateWithPresetDictCompressor extends Compressor {
 
-    final byte[] dictBytes;
-    final int blockLength;
+    private final int dictLength, blockLength;
     final Deflater compressor;
     byte[] compressed;
     boolean closed;
 
     DeflateWithPresetDictCompressor(int level, int dictLength, int blockLength) {
-      compressor = new Deflater(level, true);
+      compressor = BugfixDeflater_JDK8252739.createDeflaterInstance(level, true, dictLength);
       compressed = new byte[64];
-      this.dictBytes = new byte[dictLength];
+      this.dictLength = dictLength;
       this.blockLength = blockLength;
     }
 
@@ -197,8 +196,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
 
     @Override
     public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
-      final int dictLength = Math.min(dictBytes.length, len);
-      System.arraycopy(bytes, off, dictBytes, 0, dictLength);
+      final int dictLength = Math.min(this.dictLength, len);
       out.writeVInt(dictLength);
       out.writeVInt(blockLength);
       final int end = off + len;
@@ -210,8 +208,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
       // And then sub blocks
       for (int start = off + dictLength; start < end; start += blockLength) {
         compressor.reset();
-        // NOTE: offset MUST be 0 when setting the dictionary in order to work around JDK-8252739
-        compressor.setDictionary(dictBytes, 0, dictLength);
+        compressor.setDictionary(bytes, off, dictLength);
         doCompress(bytes, start, Math.min(blockLength, off + len - start), out);
       }
     }
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
index f4ebca6..b6dc5a5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
@@ -77,4 +77,8 @@ public class TestLucene87StoredFieldsFormatHighCompression extends BaseStoredFie
       new Lucene87StoredFieldsFormat(null);
     });
   }
+  
+  public void testShowJDKBugStatus() {
+    System.err.println("JDK is buggy (JDK-8252739): " + BugfixDeflater_JDK8252739.IS_BUGGY_JDK);
+  }
 }