You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2008/02/04 21:20:42 UTC

svn commit: r618419 - /commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java

Author: ggregory
Date: Mon Feb  4 12:20:41 2008
New Revision: 618419

URL: http://svn.apache.org/viewvc?rev=618419&view=rev
Log:
[CODEC-61] Base64.EncodeBase64() throws NegativeArraySizeException on large files.
Throws IllegalArgumentException when the input array needs an output array bigger than Integer.MAX_VALUE.

Modified:
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java?rev=618419&r1=618418&r2=618419&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java (original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/binary/Base64.java Mon Feb  4 12:20:41 2008
@@ -13,7 +13,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- */ 
+ */
 
 package org.apache.commons.codec.binary;
 
@@ -25,10 +25,11 @@
 /**
  * Provides Base64 encoding and decoding as defined by RFC 2045.
  * 
- * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> 
- * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One: 
- * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p> 
- *
+ * <p>
+ * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose
+ * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein.
+ * </p>
+ * 
  * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
  * @author Apache Software Foundation
  * @since 1.0-dev
@@ -39,8 +40,10 @@
     /**
      * Chunk size per RFC 2045 section 6.8.
      * 
-     * <p>The {@value} character limit does not count the trailing CRLF, but counts 
-     * all other characters, including any equal signs.</p>
+     * <p>
+     * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
+     * equal signs.
+     * </p>
      * 
      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
      */
@@ -87,7 +90,7 @@
      * Used to test the sign of a byte.
      */
     private static final int SIGN = -128;
-    
+
     /**
      * Byte used to pad output.
      */
@@ -104,7 +107,7 @@
      * </p>
      */
     private static final byte[] base64Alphabet = new byte[BASELENGTH];
-    
+
     /**
      * <p>
      * Contains the Base64 encodings <code>A</code> through <code>Z</code>, followed by <code>a</code> through
@@ -157,7 +160,8 @@
     /**
      * Returns whether or not the <code>octect</code> is in the base 64 alphabet.
      * 
-     * @param octect The value to test
+     * @param octect
+     *            The value to test
      * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise.
      */
     private static boolean isBase64(byte octect) {
@@ -171,12 +175,12 @@
     }
 
     /**
-     * Tests a given byte array to see if it contains
-     * only valid characters within the Base64 alphabet.
-     *
-     * @param arrayOctect byte array to test
-     * @return <code>true</code> if all bytes are valid characters in the Base64
-     *         alphabet or if the byte array is empty; false, otherwise
+     * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
+     * 
+     * @param arrayOctect
+     *            byte array to test
+     * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is
+     *         empty; false, otherwise
      */
     public static boolean isArrayByteBase64(byte[] arrayOctect) {
 
@@ -197,10 +201,10 @@
     }
 
     /**
-     * Encodes binary data using the base64 algorithm but
-     * does not chunk the output.
-     *
-     * @param binaryData binary data to encode
+     * Encodes binary data using the base64 algorithm but does not chunk the output.
+     * 
+     * @param binaryData
+     *            binary data to encode
      * @return Base64 characters
      */
     public static byte[] encodeBase64(byte[] binaryData) {
@@ -208,28 +212,25 @@
     }
 
     /**
-     * Encodes binary data using the base64 algorithm and chunks
-     * the encoded output into 76 character blocks
-     *
-     * @param binaryData binary data to encode
+     * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
+     * 
+     * @param binaryData
+     *            binary data to encode
      * @return Base64 characters chunked in 76 character blocks
      */
     public static byte[] encodeBase64Chunked(byte[] binaryData) {
         return encodeBase64(binaryData, true);
     }
 
-
     /**
-     * Decodes an Object using the base64 algorithm.  This method
-     * is provided in order to satisfy the requirements of the
-     * Decoder interface, and will throw a DecoderException if the
-     * supplied object is not of type byte[].
-     *
-     * @param pObject Object to decode
-     * @return An object (of type byte[]) containing the 
-     *         binary data which corresponds to the byte[] supplied.
-     * @throws DecoderException if the parameter supplied is not
-     *                          of type byte[]
+     * Decodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
+     * Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[].
+     * 
+     * @param pObject
+     *            Object to decode
+     * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] supplied.
+     * @throws DecoderException
+     *             if the parameter supplied is not of type byte[]
      */
     public Object decode(Object pObject) throws DecoderException {
         if (!(pObject instanceof byte[])) {
@@ -239,10 +240,10 @@
     }
 
     /**
-     * Decodes a byte[] containing containing
-     * characters in the Base64 alphabet.
-     *
-     * @param pArray A byte array containing Base64 character data
+     * Decodes a byte[] containing containing characters in the Base64 alphabet.
+     * 
+     * @param pArray
+     *            A byte array containing Base64 character data
      * @return a byte array containing binary data
      */
     public byte[] decode(byte[] pArray) {
@@ -250,41 +251,48 @@
     }
 
     /**
-     * Encodes binary data using the base64 algorithm, optionally
-     * chunking the output into 76 character blocks.
-     *
-     * @param binaryData Array containing binary data to encode.
-     * @param isChunked if <code>true</code> this encoder will chunk
-     *                  the base64 output into 76 character blocks
+     * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
+     * 
+     * @param binaryData
+     *            Array containing binary data to encode.
+     * @param isChunked
+     *            if <code>true</code> this encoder will chunk the base64 output into 76 character blocks
      * @return Base64-encoded data.
+     * @throws IllegalArgumentException
+     *             Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}
      */
     public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
-        int lengthDataBits = binaryData.length * EIGHTBIT;
-        int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
-        int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
-        byte encodedData[] = null;
-        int encodedDataLength = 0;
-        int nbrChunks = 0;
+        long binaryDataLength = binaryData.length;
+        long lengthDataBits = binaryDataLength * EIGHTBIT;
+        long fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
+        long tripletCount = lengthDataBits / TWENTYFOURBITGROUP;
+        long encodedDataLengthLong = 0;
+        int chunckCount = 0;
 
         if (fewerThan24bits != 0) {
-            //data not divisible by 24 bit
-            encodedDataLength = (numberTriplets + 1) * 4;
+            // data not divisible by 24 bit
+            encodedDataLengthLong = (tripletCount + 1) * 4;
         } else {
             // 16 or 8 bit
-            encodedDataLength = numberTriplets * 4;
+            encodedDataLengthLong = tripletCount * 4;
         }
 
-        // If the output is to be "chunked" into 76 character sections, 
-        // for compliance with RFC 2045 MIME, then it is important to 
+        // If the output is to be "chunked" into 76 character sections,
+        // for compliance with RFC 2045 MIME, then it is important to
         // allow for extra length to account for the separator(s)
         if (isChunked) {
 
-            nbrChunks =
-                (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
-            encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
+            chunckCount = (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math
+                    .ceil((float) encodedDataLengthLong / CHUNK_SIZE));
+            encodedDataLengthLong += chunckCount * CHUNK_SEPARATOR.length;
         }
 
-        encodedData = new byte[encodedDataLength];
+        if (encodedDataLengthLong > Integer.MAX_VALUE) {
+            throw new IllegalArgumentException(
+                    "Input array too big, output array would be bigger than Integer.MAX_VALUE=" + Integer.MAX_VALUE);
+        }
+        int encodedDataLength = (int) encodedDataLengthLong;
+        byte encodedData[] = new byte[encodedDataLength];
 
         byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
 
@@ -294,33 +302,28 @@
         int nextSeparatorIndex = CHUNK_SIZE;
         int chunksSoFar = 0;
 
-        //log.debug("number of triplets = " + numberTriplets);
-        for (i = 0; i < numberTriplets; i++) {
+        // log.debug("number of triplets = " + numberTriplets);
+        for (i = 0; i < tripletCount; i++) {
             dataIndex = i * 3;
             b1 = binaryData[dataIndex];
             b2 = binaryData[dataIndex + 1];
             b3 = binaryData[dataIndex + 2];
 
-            //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
+            // log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
 
             l = (byte) (b2 & 0x0f);
             k = (byte) (b1 & 0x03);
 
-            byte val1 =
-                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
-            byte val2 =
-                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
-            byte val3 =
-                ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
+            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
+            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
+            byte val3 = ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
 
             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
-            //log.debug( "val2 = " + val2 );
-            //log.debug( "k4   = " + (k<<4) );
-            //log.debug(  "vak  = " + (val2 | (k<<4)) );
-            encodedData[encodedIndex + 1] =
-                lookUpBase64Alphabet[val2 | (k << 4)];
-            encodedData[encodedIndex + 2] =
-                lookUpBase64Alphabet[(l << 2) | val3];
+            // log.debug( "val2 = " + val2 );
+            // log.debug( "k4 = " + (k<<4) );
+            // log.debug( "vak = " + (val2 | (k<<4)) );
+            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
+            encodedData[encodedIndex + 2] = lookUpBase64Alphabet[(l << 2) | val3];
             encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
 
             encodedIndex += 4;
@@ -329,16 +332,9 @@
             if (isChunked) {
                 // this assumes that CHUNK_SIZE % 4 == 0
                 if (encodedIndex == nextSeparatorIndex) {
-                    System.arraycopy(
-                        CHUNK_SEPARATOR,
-                        0,
-                        encodedData,
-                        encodedIndex,
-                        CHUNK_SEPARATOR.length);
+                    System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedIndex, CHUNK_SEPARATOR.length);
                     chunksSoFar++;
-                    nextSeparatorIndex =
-                        (CHUNK_SIZE * (chunksSoFar + 1)) + 
-                        (chunksSoFar * CHUNK_SEPARATOR.length);
+                    nextSeparatorIndex = (CHUNK_SIZE * (chunksSoFar + 1)) + (chunksSoFar * CHUNK_SEPARATOR.length);
                     encodedIndex += CHUNK_SEPARATOR.length;
                 }
             }
@@ -350,10 +346,9 @@
         if (fewerThan24bits == EIGHTBIT) {
             b1 = binaryData[dataIndex];
             k = (byte) (b1 & 0x03);
-            //log.debug("b1=" + b1);
-            //log.debug("b1<<2 = " + (b1>>2) );
-            byte val1 =
-                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
+            // log.debug("b1=" + b1);
+            // log.debug("b1<<2 = " + (b1>>2) );
+            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
             encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
             encodedData[encodedIndex + 2] = PAD;
@@ -365,27 +360,20 @@
             l = (byte) (b2 & 0x0f);
             k = (byte) (b1 & 0x03);
 
-            byte val1 =
-                ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
-            byte val2 =
-                ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
+            byte val1 = ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
+            byte val2 = ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
 
             encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
-            encodedData[encodedIndex + 1] =
-                lookUpBase64Alphabet[val2 | (k << 4)];
+            encodedData[encodedIndex + 1] = lookUpBase64Alphabet[val2 | (k << 4)];
             encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
             encodedData[encodedIndex + 3] = PAD;
         }
 
         if (isChunked) {
             // we also add a separator to the end of the final chunk.
-            if (chunksSoFar < nbrChunks) {
-                System.arraycopy(
-                    CHUNK_SEPARATOR,
-                    0,
-                    encodedData,
-                    encodedDataLength - CHUNK_SEPARATOR.length,
-                    CHUNK_SEPARATOR.length);
+            if (chunksSoFar < chunckCount) {
+                System.arraycopy(CHUNK_SEPARATOR, 0, encodedData, encodedDataLength - CHUNK_SEPARATOR.length,
+                        CHUNK_SEPARATOR.length);
             }
         }
 
@@ -394,8 +382,9 @@
 
     /**
      * Decodes Base64 data into octects
-     *
-     * @param base64Data Byte array containing Base64 data
+     * 
+     * @param base64Data
+     *            Byte array containing Base64 data
      * @return Array containing decoded data.
      */
     public static byte[] decodeBase64(byte[] base64Data) {
@@ -426,59 +415,57 @@
             }
             decodedData = new byte[lastData - numberQuadruple];
         }
-        
+
         for (int i = 0; i < numberQuadruple; i++) {
             dataIndex = i * 4;
             marker0 = base64Data[dataIndex + 2];
             marker1 = base64Data[dataIndex + 3];
-            
+
             b1 = base64Alphabet[base64Data[dataIndex]];
             b2 = base64Alphabet[base64Data[dataIndex + 1]];
-            
+
             if (marker0 != PAD && marker1 != PAD) {
-                //No PAD e.g 3cQl
+                // No PAD e.g 3cQl
                 b3 = base64Alphabet[marker0];
                 b4 = base64Alphabet[marker1];
-                
+
                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
-                decodedData[encodedIndex + 1] =
-                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
+                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
                 decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
             } else if (marker0 == PAD) {
-                //Two PAD e.g. 3c[Pad][Pad]
+                // Two PAD e.g. 3c[Pad][Pad]
                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
             } else if (marker1 == PAD) {
-                //One PAD e.g. 3cQ[Pad]
+                // One PAD e.g. 3cQ[Pad]
                 b3 = base64Alphabet[marker0];
-                
+
                 decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
-                decodedData[encodedIndex + 1] =
-                    (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
+                decodedData[encodedIndex + 1] = (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
             }
             encodedIndex += 3;
         }
         return decodedData;
     }
-    
+
     /**
      * Discards any whitespace from a base-64 encoded block.
-     *
-     * @param data The base-64 encoded data to discard the whitespace
-     * from.
+     * 
+     * @param data
+     *            The base-64 encoded data to discard the whitespace from.
      * @return The data, less whitespace (see RFC 2045).
      */
     static byte[] discardWhitespace(byte[] data) {
         byte groomedData[] = new byte[data.length];
         int bytesCopied = 0;
-        
+
         for (int i = 0; i < data.length; i++) {
             switch (data[i]) {
-            case (byte) ' ' :
-            case (byte) '\n' :
-            case (byte) '\r' :
-            case (byte) '\t' :
+                case (byte) ' ' :
+                case (byte) '\n' :
+                case (byte) '\r' :
+                case (byte) '\t' :
                     break;
-            default:
+                default :
                     groomedData[bytesCopied++] = data[i];
             }
         }
@@ -491,12 +478,11 @@
     }
 
     /**
-     * Discards any characters outside of the base64 alphabet, per
-     * the requirements on page 25 of RFC 2045 - "Any characters
-     * outside of the base64 alphabet are to be ignored in base64
-     * encoded data."
-     *
-     * @param data The base-64 encoded data to groom
+     * Discards any characters outside of the base64 alphabet, per the requirements on page 25 of RFC 2045 - "Any
+     * characters outside of the base64 alphabet are to be ignored in base64 encoded data."
+     * 
+     * @param data
+     *            The base-64 encoded data to groom
      * @return The data, less non-base64 characters (see RFC 2045).
      */
     static byte[] discardNonBase64(byte[] data) {
@@ -516,34 +502,30 @@
         return packedData;
     }
 
-
     // Implementation of the Encoder Interface
 
     /**
-     * Encodes an Object using the base64 algorithm.  This method
-     * is provided in order to satisfy the requirements of the
-     * Encoder interface, and will throw an EncoderException if the
-     * supplied object is not of type byte[].
-     *
-     * @param pObject Object to encode
-     * @return An object (of type byte[]) containing the 
-     *         base64 encoded data which corresponds to the byte[] supplied.
-     * @throws EncoderException if the parameter supplied is not
-     *                          of type byte[]
+     * Encodes an Object using the base64 algorithm. This method is provided in order to satisfy the requirements of the
+     * Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
+     * 
+     * @param pObject
+     *            Object to encode
+     * @return An object (of type byte[]) containing the base64 encoded data which corresponds to the byte[] supplied.
+     * @throws EncoderException
+     *             if the parameter supplied is not of type byte[]
      */
     public Object encode(Object pObject) throws EncoderException {
         if (!(pObject instanceof byte[])) {
-            throw new EncoderException(
-                "Parameter supplied to Base64 encode is not a byte[]");
+            throw new EncoderException("Parameter supplied to Base64 encode is not a byte[]");
         }
         return encode((byte[]) pObject);
     }
 
     /**
-     * Encodes a byte[] containing binary data, into a byte[] containing
-     * characters in the Base64 alphabet.
-     *
-     * @param pArray a byte array containing binary data
+     * Encodes a byte[] containing binary data, into a byte[] containing characters in the Base64 alphabet.
+     * 
+     * @param pArray
+     *            a byte array containing binary data
      * @return A byte array containing only Base64 character data
      */
     public byte[] encode(byte[] pArray) {