You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ah...@apache.org on 2019/12/02 21:14:15 UTC

[commons-codec] 01/03: [CODEC-265] BaseNCodec to expand buffer using overflow conscious code.

This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git

commit 0956493bcb3fe40ca37d66768ec7817f8a1f410b
Author: aherbert <ah...@apache.org>
AuthorDate: Tue Nov 26 12:25:12 2019 +0000

    [CODEC-265] BaseNCodec to expand buffer using overflow conscious code.
    
    Add a test to encode a 1 Gigabyte byte[].
    
    Added overflow conscious code for buffer expansion.
    
    This is based on java.util.ArrayList but modified to use a local copy of
    JDK 1.8 Integer.compareUnsigned(int, int).
    
    Unlike the java.util.ArrayList if the minCapcity exceeds the head room
    for the maximum array size, the capacity returned is the required
    minCapcity and not Integer.MAX_VALUE.
---
 .../apache/commons/codec/binary/BaseNCodec.java    |  94 +++++++++--
 .../apache/commons/codec/binary/Base64Test.java    |  36 ++++-
 .../commons/codec/binary/BaseNCodecTest.java       | 173 +++++++++++++++++++++
 3 files changed, 290 insertions(+), 13 deletions(-)

diff --git a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
index fb254e3..3b40ad3 100644
--- a/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
+++ b/src/main/java/org/apache/commons/codec/binary/BaseNCodec.java
@@ -144,6 +144,18 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      */
     private static final int DEFAULT_BUFFER_SIZE = 8192;
 
+    /**
+     * The maximum size buffer to allocate.
+     *
+     * <p>This is set to the same size used in the JDK {@code java.util.ArrayList}:</p>
+     * <blockquote>
+     * Some VMs reserve some header words in an array.
+     * Attempts to allocate larger arrays may result in
+     * OutOfMemoryError: Requested array size exceeds VM limit.
+     * </blockquote>
+     */
+    private static final int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8;
+
     /** Mask used to extract 8 bits, used in decoding bytes */
     protected static final int MASK_8BITS = 0xff;
 
@@ -243,18 +255,69 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
     /**
      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
      * @param context the context to be used
+     * @param minCapacity the minimum required capacity
+     * @return the resized byte[] buffer
+     * @throws OutOfMemoryError if the {@code minCapacity} is negative
      */
-    private byte[] resizeBuffer(final Context context) {
-        if (context.buffer == null) {
-            context.buffer = new byte[getDefaultBufferSize()];
-            context.pos = 0;
-            context.readPos = 0;
-        } else {
-            final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
-            System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
-            context.buffer = b;
+    private static byte[] resizeBuffer(final Context context, final int minCapacity) {
+        // Overflow-conscious code treats the min and new capacity as unsigned.
+        final int oldCapacity = context.buffer.length;
+        int newCapacity = oldCapacity * DEFAULT_BUFFER_RESIZE_FACTOR;
+        if (compareUnsigned(newCapacity, minCapacity) < 0) {
+            newCapacity = minCapacity;
         }
-        return context.buffer;
+        if (compareUnsigned(newCapacity, MAX_BUFFER_SIZE) > 0) {
+            newCapacity = createPositiveCapacity(minCapacity);
+        }
+
+        final byte[] b = new byte[newCapacity];
+        System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
+        context.buffer = b;
+        return b;
+    }
+
+    /**
+     * Compares two {@code int} values numerically treating the values
+     * as unsigned. Taken from JDK 1.8.
+     *
+     * <p>TODO: Replace with JDK 1.8 Integer::compareUnsigned(int, int).</p>
+     *
+     * @param  x the first {@code int} to compare
+     * @param  y the second {@code int} to compare
+     * @return the value {@code 0} if {@code x == y}; a value less
+     *         than {@code 0} if {@code x < y} as unsigned values; and
+     *         a value greater than {@code 0} if {@code x > y} as
+     *         unsigned values
+     */
+    private static int compareUnsigned(int x, int y) {
+        return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
+    }
+
+    /**
+     * Create a positive capacity at least as large the minimum required capacity.
+     * If the minimum capacity is negative then this throws an OutOfMemoryError as no array
+     * can be allocated.
+     *
+     * @param minCapacity the minimum capacity
+     * @return the capacity
+     * @throws OutOfMemoryError if the {@code minCapacity} is negative
+     */
+    private static int createPositiveCapacity(int minCapacity) {
+        if (minCapacity < 0) {
+            // overflow
+            throw new OutOfMemoryError("Unable to allocate array size: " + (minCapacity & 0xffffffffL));
+        }
+        // This is called when we require buffer expansion to a very big array.
+        // Use the conservative maximum buffer size if possible, otherwise the biggest required.
+        //
+        // Note: In this situation JDK 1.8 java.util.ArrayList returns Integer.MAX_VALUE.
+        // This excludes some VMs that can exceed MAX_BUFFER_SIZE but not allocate a full
+        // Integer.MAX_VALUE length array.
+        // The result is that we may have to allocate an array of this size more than once if
+        // the capacity must be expanded again.
+        return (minCapacity > MAX_BUFFER_SIZE) ?
+            minCapacity :
+            MAX_BUFFER_SIZE;
     }
 
     /**
@@ -265,8 +328,15 @@ public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
      * @return the buffer
      */
     protected byte[] ensureBufferSize(final int size, final Context context){
-        if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
-            return resizeBuffer(context);
+        if (context.buffer == null) {
+            context.buffer = new byte[getDefaultBufferSize()];
+            context.pos = 0;
+            context.readPos = 0;
+
+            // Overflow-conscious:
+            // x + y > z  ==  x + y - z > 0
+        } else if (context.pos + size - context.buffer.length > 0) {
+            return resizeBuffer(context, context.pos + size);
         }
         return context.buffer;
     }
diff --git a/src/test/java/org/apache/commons/codec/binary/Base64Test.java b/src/test/java/org/apache/commons/codec/binary/Base64Test.java
index cf73709..1034d12 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base64Test.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base64Test.java
@@ -32,7 +32,7 @@ import org.apache.commons.codec.Charsets;
 import org.apache.commons.codec.DecoderException;
 import org.apache.commons.codec.EncoderException;
 import org.apache.commons.lang3.ArrayUtils;
-import org.junit.Ignore;
+import org.junit.Assume;
 import org.junit.Test;
 
 /**
@@ -1378,4 +1378,38 @@ public class Base64Test {
             }
         }
     }
+
+    /**
+     * Test for CODEC-265: Encode a 1GiB file.
+     *
+     * @see <a href="https://issues.apache.org/jira/projects/CODEC/issues/CODEC-265">CODEC-265</a>
+     */
+    @Test
+    public void testCodec265() {
+        // 1GiB file to encode: 2^30 bytes
+        final int size1GiB = 1 << 30;
+
+        // Expecting a size of 4 output bytes per 3 input bytes plus the trailing bytes
+        // padded to a block size of 4.
+        int blocks = (int) Math.ceil(size1GiB / 3.0);
+        final int expectedLength = 4 * blocks;
+
+        // This test is memory hungry. Check we can run it.
+        final long presumableFreeMemory = BaseNCodecTest.getPresumableFreeMemory();
+
+        // Estimate the maximum memory required:
+        // 1GiB + 1GiB + ~2GiB + ~1.33GiB + 32 KiB  = ~5.33GiB
+        //
+        // 1GiB: Input buffer to encode
+        // 1GiB: Existing working buffer (due to doubling of default buffer size of 8192)
+        // ~2GiB: New working buffer to allocate (due to doubling)
+        // ~1.33GiB: Expected output size (since the working buffer is copied at the end) 
+        // 32KiB: Some head room
+        final long estimatedMemory = (long) size1GiB * 4 + expectedLength + 32 * 1024;
+        Assume.assumeTrue("Not enough free memory for the test", presumableFreeMemory > estimatedMemory);
+
+        final byte[] bytes = new byte[size1GiB];
+        final byte[] encoded = Base64.encodeBase64(bytes);
+        assertEquals(expectedLength, encoded.length);
+    }
 }
diff --git a/src/test/java/org/apache/commons/codec/binary/BaseNCodecTest.java b/src/test/java/org/apache/commons/codec/binary/BaseNCodecTest.java
index 183d3b0..3e8d6dc 100644
--- a/src/test/java/org/apache/commons/codec/binary/BaseNCodecTest.java
+++ b/src/test/java/org/apache/commons/codec/binary/BaseNCodecTest.java
@@ -23,6 +23,9 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
+import org.apache.commons.codec.binary.BaseNCodec.Context;
+import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -191,4 +194,174 @@ public class BaseNCodecTest {
         // Then
         assertEquals(0x25, actualPaddingByte);
     }
+
+    @Test
+    public void testEnsureBufferSize() {
+        final BaseNCodec ncodec = new NoOpBaseNCodec();
+        final Context context = new Context();
+        Assert.assertNull("Initial buffer should be null", context.buffer);
+
+        // Test initialisation
+        context.pos = 76979;
+        context.readPos = 273;
+        ncodec.ensureBufferSize(0, context);
+        Assert.assertNotNull("buffer should be initialised", context.buffer);
+        Assert.assertEquals("buffer should be initialised to default size", ncodec.getDefaultBufferSize(), context.buffer.length);
+        Assert.assertEquals("context position", 0, context.pos);
+        Assert.assertEquals("context read position", 0, context.readPos);
+
+        // Test when no expansion is required
+        ncodec.ensureBufferSize(1, context);
+        Assert.assertEquals("buffer should not expand unless required", ncodec.getDefaultBufferSize(), context.buffer.length);
+
+        // Test expansion
+        int length = context.buffer.length;
+        context.pos = length;
+        int extra = 1;
+        ncodec.ensureBufferSize(extra, context);
+        Assert.assertTrue("buffer should expand", context.buffer.length >= length + extra);
+
+        // Test expansion beyond double the buffer size.
+        // Hits the edge case where the required capacity is more than the default expansion.
+        length = context.buffer.length;
+        context.pos = length;
+        extra = length * 10;
+        ncodec.ensureBufferSize(extra, context);
+        Assert.assertTrue("buffer should expand beyond double capacity", context.buffer.length >= length + extra);
+    }
+
+    /**
+     * Test to expand to the max buffer size.
+     */
+    @Test
+    public void testEnsureBufferSizeExpandsToMaxBufferSize() {
+        assertEnsureBufferSizeExpandsToMaxBufferSize(false);
+    }
+
+    /**
+     * Test to expand to beyond the max buffer size.
+     *
+     * <p>Note: If the buffer is required to expand to above the max buffer size it may not work
+     * on all VMs and may have to be annotated with @Ignore.</p>
+     */
+    @Test
+    public void testEnsureBufferSizeExpandsToBeyondMaxBufferSize() {
+        assertEnsureBufferSizeExpandsToMaxBufferSize(true);
+    }
+
+    private static void assertEnsureBufferSizeExpandsToMaxBufferSize(boolean exceedMaxBufferSize) {
+        // This test is memory hungry.
+        // By default expansion will double the buffer size.
+        // Using a buffer that must be doubled to get close to 2GiB requires at least 3GiB
+        // of memory for the test (1GiB existing + 2GiB new).
+        // As a compromise we use an empty buffer and rely on the expansion switching
+        // to the minimum required capacity if doubling is not enough.
+
+        // To effectively use a full buffer of ~1GiB change the following for: 1 << 30.
+        // Setting to zero has the lowest memory footprint for this test.
+        final int length = 0;
+
+        final long presumableFreeMemory = getPresumableFreeMemory();
+        // 2GiB + 32 KiB + length
+        // 2GiB: Buffer to allocate
+        // 32KiB: Some head room
+        // length: Existing buffer
+        final long estimatedMemory = (1L << 31) + 32 * 1024 + length;
+        Assume.assumeTrue("Not enough free memory for the test", presumableFreeMemory > estimatedMemory);
+
+        final int max = Integer.MAX_VALUE - 8;
+
+        // Check the conservative maximum buffer size can actually be exceeded by the VM
+        // otherwise the test is not valid.
+        if (exceedMaxBufferSize) {
+            assumeCanAllocateBufferSize(max + 1);
+            // Free-memory.
+            // This may not be necessary as the byte[] is now out of scope
+            System.gc();
+        }
+
+        final BaseNCodec ncodec = new NoOpBaseNCodec();
+        final Context context = new Context();
+
+        // Allocate the initial buffer
+        context.buffer = new byte[length];
+        context.pos = length;
+        // Compute the extra to reach or exceed the max buffer size
+        int extra = max - length;
+        if (exceedMaxBufferSize) {
+            extra++;
+        }
+        ncodec.ensureBufferSize(extra, context);
+        Assert.assertTrue(context.buffer.length >= length + extra);
+    }
+
+    /**
+     * Verify this VM can allocate the given size byte array. Otherwise skip the test.
+     */
+    private static void assumeCanAllocateBufferSize(int size) {
+        byte[] bytes = null;
+        try {
+            bytes = new byte[size];
+        } catch (OutOfMemoryError ignore) {
+            // ignore
+        }
+        Assume.assumeTrue("Cannot allocate array of size: " + size, bytes != null);
+    }
+
+    /**
+     * Gets the presumable free memory; an estimate of the amount of memory that could be allocated.
+     *
+     * <p>This performs a garbage clean-up and the obtains the presumed amount of free memory
+     * that can be allocated in this VM. This is computed as:<p>
+     *
+     * <pre>
+     * System.gc();
+     * long allocatedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+     * long presumableFreeMemory = Runtime.getRuntime().maxMemory() - allocatedMemory;
+     * </pre>
+     *
+     * @return the presumable free memory
+     * @see <a href="https://stackoverflow.com/a/18366283">
+     *     Christian Fries StackOverflow answer on Java available memory</a>
+     */
+    static long getPresumableFreeMemory() {
+        System.gc();
+        final long allocatedMemory = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+        return Runtime.getRuntime().maxMemory() - allocatedMemory;
+    }
+
+    @Test(expected = OutOfMemoryError.class)
+    public void testEnsureBufferSizeThrowsOnOverflow() {
+        final BaseNCodec ncodec = new NoOpBaseNCodec();
+        final Context context = new Context();
+
+        final int length = 10;
+        context.buffer = new byte[length];
+        context.pos = length;
+        final int extra = Integer.MAX_VALUE;
+        ncodec.ensureBufferSize(extra, context);
+    }
+
+    /**
+     * Extend BaseNCodec without implementation (no operations = NoOp).
+     * Used for testing the memory allocation in {@link BaseNCodec#ensureBufferSize(int, Context)}.
+     */
+    private static class NoOpBaseNCodec extends BaseNCodec {
+        NoOpBaseNCodec() {
+            super(0, 0, 0, 0);
+        }
+
+        @Override
+        void encode(byte[] pArray, int i, int length, Context context) {
+        }
+
+        @Override
+        void decode(byte[] pArray, int i, int length, Context context) {
+        }
+
+        @Override
+        protected boolean isInAlphabet(byte value) {
+            return false;
+        }
+    }
 }