You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/05/21 20:22:22 UTC

[commons-codec] 01/02: CODEC-301: Reduce byte[] allocations by reusing buffers

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 6b67d6f093a821ede0b393f260b407d035289e07
Author: Alexander Pinske <al...@pinske.eu>
AuthorDate: Mon May 10 21:36:22 2021 +0200

    CODEC-301: Reduce byte[] allocations by reusing buffers
    
    * Reduces byte[] allocations from 280MB to <4MB when reading a 133MB base64 stream. Messured with JFR.
    * Keep reusing inital buffer when decoding BaseN
    * Attempt to fill up the user-provided buffer
        Previously we only filled up to a maximum of 8KB - encoding-overhead (e.g. 6KB for Base64) even if the provided
        buffer was bigger.
    * Reuse hasData method for checking pos/readPos markers
---
 .../apache/commons/codec/binary/BaseNCodec.java    | 14 ++++++++-----
 .../codec/binary/BaseNCodecInputStream.java        | 16 ++++++++++++---
 .../codec/binary/Base64InputStreamTest.java        | 24 ++++++++++++++++++++++
 3 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
index b054253..b22f2a4 100644
--- a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
+++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
@@ -394,7 +394,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return The amount of buffered data available for reading.
      */
     int available(final Context context) {  // package protected for access from I/O streams
-        return context.buffer != null ? context.pos - context.readPos : 0;
+        return hasData(context) ? context.pos - context.readPos : 0;
     }
 
     /**
@@ -632,7 +632,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return true if there is data still available for reading.
      */
     boolean hasData(final Context context) {  // package protected for access from I/O streams
-        return context.buffer != null;
+        return context.pos > context.readPos;
     }
 
     /**
@@ -711,12 +711,16 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return The number of bytes successfully extracted into the provided byte[] array.
      */
     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
-        if (context.buffer != null) {
+        if (hasData(context)) {
             final int len = Math.min(available(context), bAvail);
             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
             context.readPos += len;
-            if (context.readPos >= context.pos) {
-                context.buffer = null; // so hasData() will return false, and this method can return -1
+            if (!hasData(context)) {
+                // All data read.
+                // Reset position markers but do not set buffer to null to allow its reuse.
+                // hasData(context) will still return false, and this method will return 0 until
+                // more data is available, or -1 if EOF.
+                context.pos = context.readPos = 0;
             }
             return len;
         }
diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
index f225aab..29c6259 100644
--- a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
+++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
@@ -39,12 +39,15 @@ public class BaseNCodecInputStream extends FilterInputStream {
 
     private final byte[] singleByte = new byte[1];
 
+    private final byte[] buf;
+
     private final Context context = new Context();
 
     protected BaseNCodecInputStream(final InputStream input, final BaseNCodec baseNCodec, final boolean doEncode) {
         super(input);
         this.doEncode = doEncode;
         this.baseNCodec = baseNCodec;
+        this.buf = new byte[doEncode ? 4096 : 8192];
     }
 
     /**
@@ -169,9 +172,11 @@ public class BaseNCodecInputStream extends FilterInputStream {
          -----
          This is a fix for CODEC-101
         */
-        while (readLen == 0) {
+        // Attempt to read the request length
+        while (readLen < len) {
             if (!baseNCodec.hasData(context)) {
-                final byte[] buf = new byte[doEncode ? 4096 : 8192];
+                // Obtain more data.
+                // buf is reused across calls to read to avoid repeated allocations
                 final int c = in.read(buf);
                 if (doEncode) {
                     baseNCodec.encode(buf, 0, c, context);
@@ -179,7 +184,12 @@ public class BaseNCodecInputStream extends FilterInputStream {
                     baseNCodec.decode(buf, 0, c, context);
                 }
             }
-            readLen = baseNCodec.readResults(array, offset, len, context);
+            final int read = baseNCodec.readResults(array, offset + readLen, len - readLen, context);
+            if (read < 0) {
+                // Return the amount read or EOF
+                return readLen != 0 ? readLen : -1;
+            }
+            readLen += read;
         }
         return readLen;
     }
diff --git a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
index 93172b9..cdbfc7c 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
@@ -409,6 +409,30 @@ public class Base64InputStreamTest {
     }
 
     /**
+     * Tests read using different buffer sizes
+     *
+     * @throws Exception
+     *             for some failure scenarios.
+     */
+    @Test
+    public void testReadMultipleBufferSizes() throws Exception {
+        final byte[][] randomData = BaseNTestData.randomData(new Base64(0, null, false), 1024 * 64);
+        byte[] encoded = randomData[1];
+        byte[] decoded = randomData[0];
+        final ByteArrayInputStream bin = new ByteArrayInputStream(encoded);
+        final ByteArrayOutputStream out = new ByteArrayOutputStream();
+        try (final Base64InputStream in = new Base64InputStream(bin)) {
+            for (int i : new int[] { 4 * 1024, 4 * 1024, 8 * 1024, 8 * 1024, 16 * 1024, 16 * 1024, 8 * 1024 }) {
+                final byte[] buf = new byte[i];
+                int bytesRead = in.read(buf);
+                assertEquals(i, bytesRead);
+                out.write(buf, 0, bytesRead);
+            }
+        }
+        assertArrayEquals(decoded, out.toByteArray());
+    }
+
+    /**
      * Tests read returning 0
      *
      * @throws Exception