You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by br...@apache.org on 2023/03/30 11:46:01 UTC

[solr] branch main updated: SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)

This is an automated email from the ASF dual-hosted git repository.

broustant pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 34ea32b877b SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)
34ea32b877b is described below

commit 34ea32b877b5e865528425e44d2a4bd6ba55cc84
Author: Bruno Roustant <33...@users.noreply.github.com>
AuthorDate: Thu Mar 30 13:45:53 2023 +0200

    SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)
---
 .../core/src/java/org/apache/solr/cloud/ZkCLI.java |  4 ++--
 .../apache/solr/cloud/overseer/ZkStateWriter.java  |  3 ++-
 .../src/test/org/apache/solr/cloud/ZkCLITest.java  | 24 ++++++++++++++++++----
 .../org/apache/solr/common/util/Compressor.java    | 11 ++++++++++
 .../apache/solr/common/util/ZLibCompressor.java    |  9 +++++++-
 .../solr/common/util/ZLibCompressorTest.java       |  7 +++++--
 6 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
index 11333809a70..2147639d4f3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
@@ -401,7 +401,7 @@ public class ZkCLI implements CLIO {
           byte[] data = arglist.get(1).getBytes(StandardCharsets.UTF_8);
           if (shouldCompressData(data, path, minStateByteLenForCompression)) {
             // state.json should be compressed before being put to ZK
-            data = compressor.compressBytes(data);
+            data = compressor.compressBytes(data, data.length / 10);
           }
           if (zkClient.exists(path, true)) {
             zkClient.setData(path, data, true);
@@ -422,7 +422,7 @@ public class ZkCLI implements CLIO {
           byte[] data = Files.readAllBytes(Path.of(arglist.get(1)));
           if (shouldCompressData(data, path, minStateByteLenForCompression)) {
             // state.json should be compressed before being put to ZK
-            data = compressor.compressBytes(data);
+            data = compressor.compressBytes(data, data.length / 10);
           }
           if (zkClient.exists(path, true)) {
             zkClient.setData(path, data, true);
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index b3ad7bf61fb..9e457f53f89 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -286,7 +286,8 @@ public class ZkStateWriter {
           } else {
             byte[] data = Utils.toJSON(singletonMap(c.getName(), c));
             if (minStateByteLenForCompression > -1 && data.length > minStateByteLenForCompression) {
-              data = compressor.compressBytes(data);
+              // When compressing state.json, we expect at least a 10:1 compression ratio.
+              data = compressor.compressBytes(data, data.length / 10);
             }
             if (reader.getZkClient().exists(path, true)) {
               if (log.isDebugEnabled()) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
index 20f0a8a2006..94258f6db31 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
@@ -182,7 +182,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
 
     String data = "my data";
     ZLibCompressor zLibCompressor = new ZLibCompressor();
-    byte[] expected = zLibCompressor.compressBytes(data.getBytes(StandardCharsets.UTF_8));
+    byte[] dataBytes = data.getBytes(StandardCharsets.UTF_8);
+    byte[] expected =
+        random().nextBoolean()
+            ? zLibCompressor.compressBytes(dataBytes)
+            : zLibCompressor.compressBytes(dataBytes, dataBytes.length / 10);
     String[] args =
         new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "put", "/state.json", data};
     ZkCLI.main(args);
@@ -190,7 +194,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
 
     // test re-put to existing
     data = "my data deux";
-    expected = zLibCompressor.compressBytes(data.getBytes(StandardCharsets.UTF_8));
+    dataBytes = data.getBytes(StandardCharsets.UTF_8);
+    expected =
+        random().nextBoolean()
+            ? zLibCompressor.compressBytes(dataBytes)
+            : zLibCompressor.compressBytes(dataBytes, dataBytes.length / 10);
     args = new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "put", "/state.json", data};
     ZkCLI.main(args);
     assertArrayEquals(zkClient.getZooKeeper().getData("/state.json", null, null), expected);
@@ -442,7 +450,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
     byte[] data = "getNode-data".getBytes(StandardCharsets.UTF_8);
     ZLibCompressor zLibCompressor = new ZLibCompressor();
     ByteArrayOutputStream systemOut = new ByteArrayOutputStream();
-    this.zkClient.create(getNode, zLibCompressor.compressBytes(data), CreateMode.PERSISTENT, true);
+    byte[] compressedData =
+        random().nextBoolean()
+            ? zLibCompressor.compressBytes(data)
+            : zLibCompressor.compressBytes(data, data.length / 10);
+    this.zkClient.create(getNode, compressedData, CreateMode.PERSISTENT, true);
     String[] args = new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "get", getNode};
     ZkCLI.setStdout(new PrintStream(systemOut, true, StandardCharsets.UTF_8));
     ZkCLI.main(args);
@@ -483,7 +495,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
     String getNode = "/getFileNode";
     byte[] data = "getFileNode-data".getBytes(StandardCharsets.UTF_8);
     ZLibCompressor zLibCompressor = new ZLibCompressor();
-    this.zkClient.create(getNode, zLibCompressor.compressBytes(data), CreateMode.PERSISTENT, true);
+    byte[] compressedData =
+        random().nextBoolean()
+            ? zLibCompressor.compressBytes(data)
+            : zLibCompressor.compressBytes(data, data.length / 10);
+    this.zkClient.create(getNode, compressedData, CreateMode.PERSISTENT, true);
 
     Path file =
         tmpDir.resolve("solrtest-getfile-" + this.getClass().getName() + "-" + System.nanoTime());
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java b/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
index 26903b4cf19..a1b5eb5124f 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
@@ -45,4 +45,15 @@ public interface Compressor {
    * @return compressed bytes
    */
   byte[] compressBytes(byte[] data);
+
+  /**
+   * Compresses bytes into compressed bytes using the compression implementation
+   *
+   * @param data the input uncompressed data to be compressed
+   * @param initialBufferCapacity the initial capacity of the buffer storing the compressed data. It
+   *     depends on the data type and the caller may know the expected average compression factor.
+   *     If this initial capacity is smaller than 16, the buffer capacity will be 16 anyway.
+   * @return compressed bytes
+   */
+  byte[] compressBytes(byte[] data, int initialBufferCapacity);
 }
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java b/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
index 61fcd3714a3..5cbdf5fc934 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
@@ -73,11 +73,18 @@ public class ZLibCompressor implements Compressor {
 
   @Override
   public byte[] compressBytes(byte[] data) {
+    // By default, the compression ratio is assumed to be 5:1 to set the initial capacity of the
+    // compression buffer.
+    return compressBytes(data, data.length / 5);
+  }
+
+  @Override
+  public byte[] compressBytes(byte[] data, int initialBufferCapacity) {
     Deflater compressor = new Deflater(Deflater.BEST_SPEED);
     try {
       compressor.setInput(data);
       compressor.finish();
-      byte[] buf = new byte[data.length + 8];
+      byte[] buf = new byte[Math.max(initialBufferCapacity, 16)];
       int compressedSize = 0;
       while (!compressor.finished()) {
         if (compressedSize >= buf.length) {
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java b/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
index be2d2975c0d..d4b2dc6d53f 100644
--- a/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
@@ -65,12 +65,15 @@ public class ZLibCompressorTest extends SolrTestCase {
           120, 1, 11, -50, -49, 77, 85, 40, 73, 45, 46, 81, 72, 73, 44, 73, -28, 2, 0, 43, -36, 5,
           57
         };
+    byte[] data = "Some test data\n".getBytes(StandardCharsets.UTF_8);
     byte[] compressedBytes =
-        stateCompression.compressBytes("Some test data\n".getBytes(StandardCharsets.UTF_8));
+        random().nextBoolean()
+            ? stateCompression.compressBytes(data)
+            : stateCompression.compressBytes(data, data.length / 10);
     int decompressedSize = ByteBuffer.wrap(compressedBytes, compressedBytes.length - 8, 4).getInt();
     int xoredSize = ByteBuffer.wrap(compressedBytes, compressedBytes.length - 4, 4).getInt();
     assertEquals(xoredSize, decompressedSize ^ 2018370979);
-    assertEquals("Some test data\n".getBytes(StandardCharsets.UTF_8).length, decompressedSize);
+    assertEquals(data.length, decompressedSize);
     assertArrayEquals(
         testBytes, ArrayUtil.copyOfSubArray(compressedBytes, 0, compressedBytes.length - 8));
   }