You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2021/05/21 20:22:21 UTC

[commons-codec] branch master updated (a66b760 -> d7cc6db)

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git.


    from a66b760  Add GitHub code of conduct page that points to the Apache page.
     new 6b67d6f  CODEC-301: Reduce byte[] allocations by reusing buffers
     new d7cc6db  Track changes

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/changes/changes.xml                            |  1 +
 .../apache/commons/codec/binary/BaseNCodec.java    | 14 ++++++++-----
 .../codec/binary/BaseNCodecInputStream.java        | 16 ++++++++++++---
 .../codec/binary/Base64InputStreamTest.java        | 24 ++++++++++++++++++++++
 4 files changed, 47 insertions(+), 8 deletions(-)

[commons-codec] 02/02: Track changes

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit d7cc6db2aceb98dac88f47b5057eebcf7a1c6fa5
Author: Alex Herbert <ah...@apache.org>
AuthorDate: Fri May 21 21:22:17 2021 +0100

    Track changes
---
 src/changes/changes.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 723de88..2677773 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,6 +53,7 @@ The <action> type attribute can be add,update,fix,remove.
       <action                   dev="ggregory" type="update" due-to="Gary Gregory">Update commons.japicmp.version 0.14.3 -> 0.15.2.</action>
       <action                   dev="ggregory" type="update" due-to="Dependabot">Bump actions/cache from v2 to v2.1.4 #75.</action>
       <action                   dev="ggregory" type="update" due-to="Dependabot">Bump junit from 4.13.1 to 4.13.2 #76.</action>
+      <action issue="CODEC-301" dev="aherbert" type="update" due-to="Alexander Pinske, Alex Herbert">BaseNCodecInputStream: Reduce byte[] allocations by reusing buffers.</action> 
     </release>
 
     <release version="1.15" date="2020-09-01" description="Feature and fix release.">

[commons-codec] 01/02: CODEC-301: Reduce byte[] allocations by reusing buffers

Posted by ah...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 6b67d6f093a821ede0b393f260b407d035289e07
Author: Alexander Pinske <al...@pinske.eu>
AuthorDate: Mon May 10 21:36:22 2021 +0200

    CODEC-301: Reduce byte[] allocations by reusing buffers
    
    * Reduces byte[] allocations from 280MB to <4MB when reading a 133MB base64 stream. Messured with JFR.
    * Keep reusing inital buffer when decoding BaseN
    * Attempt to fill up the user-provided buffer
        Previously we only filled up to a maximum of 8KB - encoding-overhead (e.g. 6KB for Base64) even if the provided
        buffer was bigger.
    * Reuse hasData method for checking pos/readPos markers
---
 .../apache/commons/codec/binary/BaseNCodec.java    | 14 ++++++++-----
 .../codec/binary/BaseNCodecInputStream.java        | 16 ++++++++++++---
 .../codec/binary/Base64InputStreamTest.java        | 24 ++++++++++++++++++++++
 3 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
index b054253..b22f2a4 100644
--- a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
+++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
@@ -394,7 +394,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return The amount of buffered data available for reading.
      */
     int available(final Context context) {  // package protected for access from I/O streams
-        return context.buffer != null ? context.pos - context.readPos : 0;
+        return hasData(context) ? context.pos - context.readPos : 0;
     }
 
     /**
@@ -632,7 +632,7 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return true if there is data still available for reading.
      */
     boolean hasData(final Context context) {  // package protected for access from I/O streams
-        return context.buffer != null;
+        return context.pos > context.readPos;
     }
 
     /**
@@ -711,12 +711,16 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return The number of bytes successfully extracted into the provided byte[] array.
      */
     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
-        if (context.buffer != null) {
+        if (hasData(context)) {
             final int len = Math.min(available(context), bAvail);
             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
             context.readPos += len;
-            if (context.readPos >= context.pos) {
-                context.buffer = null; // so hasData() will return false, and this method can return -1
+            if (!hasData(context)) {
+                // All data read.
+                // Reset position markers but do not set buffer to null to allow its reuse.
+                // hasData(context) will still return false, and this method will return 0 until
+                // more data is available, or -1 if EOF.
+                context.pos = context.readPos = 0;
             }
             return len;
         }
diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
index f225aab..29c6259 100644
--- a/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
+++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java
@@ -39,12 +39,15 @@ public class BaseNCodecInputStream extends FilterInputStream {
 
     private final byte[] singleByte = new byte[1];
 
+    private final byte[] buf;
+
     private final Context context = new Context();
 
     protected BaseNCodecInputStream(final InputStream input, final BaseNCodec baseNCodec, final boolean doEncode) {
         super(input);
         this.doEncode = doEncode;
         this.baseNCodec = baseNCodec;
+        this.buf = new byte[doEncode ? 4096 : 8192];
     }
 
     /**
@@ -169,9 +172,11 @@ public class BaseNCodecInputStream extends FilterInputStream {
          -----
          This is a fix for CODEC-101
         */
-        while (readLen == 0) {
+        // Attempt to read the request length
+        while (readLen < len) {
             if (!baseNCodec.hasData(context)) {
-                final byte[] buf = new byte[doEncode ? 4096 : 8192];
+                // Obtain more data.
+                // buf is reused across calls to read to avoid repeated allocations
                 final int c = in.read(buf);
                 if (doEncode) {
                     baseNCodec.encode(buf, 0, c, context);
@@ -179,7 +184,12 @@ public class BaseNCodecInputStream extends FilterInputStream {
                     baseNCodec.decode(buf, 0, c, context);
                 }
             }
-            readLen = baseNCodec.readResults(array, offset, len, context);
+            final int read = baseNCodec.readResults(array, offset + readLen, len - readLen, context);
+            if (read < 0) {
+                // Return the amount read or EOF
+                return readLen != 0 ? readLen : -1;
+            }
+            readLen += read;
         }
         return readLen;
     }
diff --git a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
index 93172b9..cdbfc7c 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java
@@ -409,6 +409,30 @@ public class Base64InputStreamTest {
     }
 
     /**
+     * Tests read using different buffer sizes
+     *
+     * @throws Exception
+     *             for some failure scenarios.
+     */
+    @Test
+    public void testReadMultipleBufferSizes() throws Exception {
+        final byte[][] randomData = BaseNTestData.randomData(new Base64(0, null, false), 1024 * 64);
+        byte[] encoded = randomData[1];
+        byte[] decoded = randomData[0];
+        final ByteArrayInputStream bin = new ByteArrayInputStream(encoded);
+        final ByteArrayOutputStream out = new ByteArrayOutputStream();
+        try (final Base64InputStream in = new Base64InputStream(bin)) {
+            for (int i : new int[] { 4 * 1024, 4 * 1024, 8 * 1024, 8 * 1024, 16 * 1024, 16 * 1024, 8 * 1024 }) {
+                final byte[] buf = new byte[i];
+                int bytesRead = in.read(buf);
+                assertEquals(i, bytesRead);
+                out.write(buf, 0, bytesRead);
+            }
+        }
+        assertArrayEquals(decoded, out.toByteArray());
+    }
+
+    /**
      * Tests read returning 0
      *
      * @throws Exception