You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by br...@apache.org on 2023/03/30 11:46:01 UTC
[solr] branch main updated: SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)
This is an automated email from the ASF dual-hosted git repository.
broustant pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 34ea32b877b SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)
34ea32b877b is described below
commit 34ea32b877b5e865528425e44d2a4bd6ba55cc84
Author: Bruno Roustant <33...@users.noreply.github.com>
AuthorDate: Thu Mar 30 13:45:53 2023 +0200
SOLR-16706: Reduce the ZLibCompressor compression buffer size. (#1469)
---
.../core/src/java/org/apache/solr/cloud/ZkCLI.java | 4 ++--
.../apache/solr/cloud/overseer/ZkStateWriter.java | 3 ++-
.../src/test/org/apache/solr/cloud/ZkCLITest.java | 24 ++++++++++++++++++----
.../org/apache/solr/common/util/Compressor.java | 11 ++++++++++
.../apache/solr/common/util/ZLibCompressor.java | 9 +++++++-
.../solr/common/util/ZLibCompressorTest.java | 7 +++++--
6 files changed, 48 insertions(+), 10 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
index 11333809a70..2147639d4f3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java
@@ -401,7 +401,7 @@ public class ZkCLI implements CLIO {
byte[] data = arglist.get(1).getBytes(StandardCharsets.UTF_8);
if (shouldCompressData(data, path, minStateByteLenForCompression)) {
// state.json should be compressed before being put to ZK
- data = compressor.compressBytes(data);
+ data = compressor.compressBytes(data, data.length / 10);
}
if (zkClient.exists(path, true)) {
zkClient.setData(path, data, true);
@@ -422,7 +422,7 @@ public class ZkCLI implements CLIO {
byte[] data = Files.readAllBytes(Path.of(arglist.get(1)));
if (shouldCompressData(data, path, minStateByteLenForCompression)) {
// state.json should be compressed before being put to ZK
- data = compressor.compressBytes(data);
+ data = compressor.compressBytes(data, data.length / 10);
}
if (zkClient.exists(path, true)) {
zkClient.setData(path, data, true);
diff --git a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
index b3ad7bf61fb..9e457f53f89 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/ZkStateWriter.java
@@ -286,7 +286,8 @@ public class ZkStateWriter {
} else {
byte[] data = Utils.toJSON(singletonMap(c.getName(), c));
if (minStateByteLenForCompression > -1 && data.length > minStateByteLenForCompression) {
- data = compressor.compressBytes(data);
+ // When compressing state.json, we expect at least a 10:1 compression ratio.
+ data = compressor.compressBytes(data, data.length / 10);
}
if (reader.getZkClient().exists(path, true)) {
if (log.isDebugEnabled()) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
index 20f0a8a2006..94258f6db31 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java
@@ -182,7 +182,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
String data = "my data";
ZLibCompressor zLibCompressor = new ZLibCompressor();
- byte[] expected = zLibCompressor.compressBytes(data.getBytes(StandardCharsets.UTF_8));
+ byte[] dataBytes = data.getBytes(StandardCharsets.UTF_8);
+ byte[] expected =
+ random().nextBoolean()
+ ? zLibCompressor.compressBytes(dataBytes)
+ : zLibCompressor.compressBytes(dataBytes, dataBytes.length / 10);
String[] args =
new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "put", "/state.json", data};
ZkCLI.main(args);
@@ -190,7 +194,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
// test re-put to existing
data = "my data deux";
- expected = zLibCompressor.compressBytes(data.getBytes(StandardCharsets.UTF_8));
+ dataBytes = data.getBytes(StandardCharsets.UTF_8);
+ expected =
+ random().nextBoolean()
+ ? zLibCompressor.compressBytes(dataBytes)
+ : zLibCompressor.compressBytes(dataBytes, dataBytes.length / 10);
args = new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "put", "/state.json", data};
ZkCLI.main(args);
assertArrayEquals(zkClient.getZooKeeper().getData("/state.json", null, null), expected);
@@ -442,7 +450,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
byte[] data = "getNode-data".getBytes(StandardCharsets.UTF_8);
ZLibCompressor zLibCompressor = new ZLibCompressor();
ByteArrayOutputStream systemOut = new ByteArrayOutputStream();
- this.zkClient.create(getNode, zLibCompressor.compressBytes(data), CreateMode.PERSISTENT, true);
+ byte[] compressedData =
+ random().nextBoolean()
+ ? zLibCompressor.compressBytes(data)
+ : zLibCompressor.compressBytes(data, data.length / 10);
+ this.zkClient.create(getNode, compressedData, CreateMode.PERSISTENT, true);
String[] args = new String[] {"-zkhost", zkServer.getZkAddress(), "-cmd", "get", getNode};
ZkCLI.setStdout(new PrintStream(systemOut, true, StandardCharsets.UTF_8));
ZkCLI.main(args);
@@ -483,7 +495,11 @@ public class ZkCLITest extends SolrTestCaseJ4 {
String getNode = "/getFileNode";
byte[] data = "getFileNode-data".getBytes(StandardCharsets.UTF_8);
ZLibCompressor zLibCompressor = new ZLibCompressor();
- this.zkClient.create(getNode, zLibCompressor.compressBytes(data), CreateMode.PERSISTENT, true);
+ byte[] compressedData =
+ random().nextBoolean()
+ ? zLibCompressor.compressBytes(data)
+ : zLibCompressor.compressBytes(data, data.length / 10);
+ this.zkClient.create(getNode, compressedData, CreateMode.PERSISTENT, true);
Path file =
tmpDir.resolve("solrtest-getfile-" + this.getClass().getName() + "-" + System.nanoTime());
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java b/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
index 26903b4cf19..a1b5eb5124f 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Compressor.java
@@ -45,4 +45,15 @@ public interface Compressor {
* @return compressed bytes
*/
byte[] compressBytes(byte[] data);
+
+ /**
+ * Compresses bytes into compressed bytes using the compression implementation
+ *
+ * @param data the input uncompressed data to be compressed
+ * @param initialBufferCapacity the initial capacity of the buffer storing the compressed data. It
+ * depends on the data type and the caller may know the expected average compression factor.
+ * If this initial capacity is smaller than 16, the buffer capacity will be 16 anyway.
+ * @return compressed bytes
+ */
+ byte[] compressBytes(byte[] data, int initialBufferCapacity);
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java b/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
index 61fcd3714a3..5cbdf5fc934 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ZLibCompressor.java
@@ -73,11 +73,18 @@ public class ZLibCompressor implements Compressor {
@Override
public byte[] compressBytes(byte[] data) {
+ // By default, the compression ratio is assumed to be 5:1 to set the initial capacity of the
+ // compression buffer.
+ return compressBytes(data, data.length / 5);
+ }
+
+ @Override
+ public byte[] compressBytes(byte[] data, int initialBufferCapacity) {
Deflater compressor = new Deflater(Deflater.BEST_SPEED);
try {
compressor.setInput(data);
compressor.finish();
- byte[] buf = new byte[data.length + 8];
+ byte[] buf = new byte[Math.max(initialBufferCapacity, 16)];
int compressedSize = 0;
while (!compressor.finished()) {
if (compressedSize >= buf.length) {
diff --git a/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java b/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
index be2d2975c0d..d4b2dc6d53f 100644
--- a/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/ZLibCompressorTest.java
@@ -65,12 +65,15 @@ public class ZLibCompressorTest extends SolrTestCase {
120, 1, 11, -50, -49, 77, 85, 40, 73, 45, 46, 81, 72, 73, 44, 73, -28, 2, 0, 43, -36, 5,
57
};
+ byte[] data = "Some test data\n".getBytes(StandardCharsets.UTF_8);
byte[] compressedBytes =
- stateCompression.compressBytes("Some test data\n".getBytes(StandardCharsets.UTF_8));
+ random().nextBoolean()
+ ? stateCompression.compressBytes(data)
+ : stateCompression.compressBytes(data, data.length / 10);
int decompressedSize = ByteBuffer.wrap(compressedBytes, compressedBytes.length - 8, 4).getInt();
int xoredSize = ByteBuffer.wrap(compressedBytes, compressedBytes.length - 4, 4).getInt();
assertEquals(xoredSize, decompressedSize ^ 2018370979);
- assertEquals("Some test data\n".getBytes(StandardCharsets.UTF_8).length, decompressedSize);
+ assertEquals(data.length, decompressedSize);
assertArrayEquals(
testBytes, ArrayUtil.copyOfSubArray(compressedBytes, 0, compressedBytes.length - 8));
}