You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ju...@apache.org on 2013/02/18 20:14:31 UTC
svn commit: r1447443 - in /commons/proper/codec/trunk/src: changes/
main/java/org/apache/commons/codec/binary/
Author: julius
Date: Mon Feb 18 19:14:31 2013
New Revision: 1447443
URL: http://svn.apache.org/r1447443
Log:
CODEC-166 - Base64 could be faster.
Added:
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
Modified:
commons/proper/codec/trunk/src/changes/changes.xml
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java
Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1447443&r1=1447442&r2=1447443&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Mon Feb 18 19:14:31 2013
@@ -48,6 +48,7 @@ The <action> type attribute can be add,u
</release>
-->
<release version="1.8" date="TBA" description="Feature and fix release.">
+ <action dev="julius" type="fix" issue="CODEC-166">Base64 could be faster.</action>
<action dev="julius" type="update" issue="CODEC-167">Adds JUnit to test our decode with pad character in the middle.</action>
<action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add Match Rating Approach (MRA) phonetic algorithm encoder.</action>
<action dev="ggregory" type="fix" issue="CODEC-163" due-to="leo141">ColognePhonetic encoder unneccessarily creates many char arrays on every loop run.</action>
Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java?rev=1447443&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java (added)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/ApacheModifiedMiGBase64.java Mon Feb 18 19:14:31 2013
@@ -0,0 +1,550 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ * NOTE ABOUT PROVENANCE:
+ * ----------------------
+ * This source file is called ApacheModifiedMiGBase64.java.
+ * We took the BSD-licensed MiGBase64.java file from SourceForge
+ * on January 28th, 2013 (http://migbase64.sourceforge.net/), and
+ * modified it to make it suitable for inclusion inside Apache
+ * Commons-Codec.
+ *
+ * The original file is licensed according to the BSD 2-clause
+ * license (see below, after the section titled "Licence (BSD)".
+ * You should also be able to obtain the original file as
+ * "MiGBase64.original" within the same source directory as this file.
+ *
+ */
+
+package org.apache.commons.codec.binary;
+
+import java.util.Arrays;
+
+/**
+ * <pre>====================================================</pre>
+ * Modified by Apache Software Foundation on February 18th, 2013, in the following ways:
+ * <p/>
+ * - Set all methods to "package" level visibility, since this is strictly
+ * meant to be back-end for our non-streaming Base64 implementation.
+ * (Streaming Base64 still uses our original implementation).
+ * <p/>
+ * - Added support for the Apache Commons Codec variations to make all the Commons-Codec
+ * unit tests pass:
+ * <ol>
+ * <li> Ability to alter line-length from default of 76</li>
+ * <li> If we are using line-separators, must always end with a line-separator, no matter
+ * length of final line.</li>
+ * <li> Make '=' and '==' padding optional when decoding.</li>
+ * <li> Make decoding of Base64 with inner padding (e.g., AA==AA==) consistent with Commons-Codec..</li>
+ * <li> Add support for URL-Safe Base64 alphabet (which, incidentally, omits '=' and '==' padding).</li>
+ * </ol>
+ * <p/>
+ * - And thus Apache Commons-Codec is now as fast as MiGBase64, since it uses MiGBase64 under the
+ * hood. Yay! (Non-streaming encode speed-up is around 200%).
+ * <p/>
+ * And now, back to your regular scheduled programming:
+ * <pre>====================================================</pre>
+ * <p/>
+ * A very fast and memory efficient class to encode and decode to and from BASE64 in full accordance
+ * with RFC 2045.<br><br>
+ * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 times faster
+ * on small arrays (10 - 1000 bytes) and 2-3 times as fast on larger arrays (10000 - 1000000 bytes)
+ * compared to <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
+ * <p/>
+ * On byte arrays the encoder is about 20% faster than Jakarta Commons Codec for encode and
+ * about 50% faster for decoding large arrays. This implementation is about twice as fast on very small
+ * arrays (< 30 bytes). If source/destination is a <code>String</code> this
+ * version is about three times as fast due to the fact that the Commons Codec result has to be recoded
+ * to a <code>String</code> from <code>byte[]</code>, which is very expensive.<br><br>
+ * <p/>
+ * This encode/decode algorithm doesn't create any temporary arrays as many other codecs do, it only
+ * allocates the resulting array. This produces less garbage and it is possible to handle arrays twice
+ * as large as algorithms that create a temporary array. (E.g. Jakarta Commons Codec). It is unknown
+ * whether Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but since performance
+ * is quite low it probably does.<br><br>
+ * <p/>
+ * The encoder produces the same output as the Sun one except that the Sun's encoder appends
+ * a trailing line separator if the last character isn't a pad. Unclear why but it only adds to the
+ * length and is probably a side effect. Both are in conformance with RFC 2045 though.<br>
+ * Commons codec seem to always att a trailing line separator.<br><br>
+ * <p/>
+ * <b>Note!</b>
+ * The encode/decode method pairs (types) come in three versions with the <b>exact</b> same algorithm and
+ * thus a lot of code redundancy. This is to not create any temporary arrays for transcoding to/from different
+ * format types. The methods not used can simply be commented out.<br><br>
+ * <p/>
+ * <S>There is also a "fast" version of all decode methods that works the same way as the normal ones, but
+ * har a few demands on the decoded input. Normally though, these fast verions should be used if the source if
+ * the input is known and it hasn't bee tampered with.</S> (- removed for ApacheModifiedMiGBase64). <br><br>
+ * <p/>
+ * If you find the code useful or you find a bug, please send me a note at base64 @ miginfocom . com.
+ * <p/>
+ * Licence (BSD):
+ * ==============
+ * <p/>
+ * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
+ * All rights reserved.
+ * <p/>
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this
+ * list of conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution.
+ * Neither the name of the MiG InfoCom AB nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without specific
+ * prior written permission.
+ * <p/>
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * @author Mikael Grev
+ * Date: 2004-aug-02
+ * Time: 11:31:11
+ * @version 2.2
+ */
+
+public final class ApacheModifiedMiGBase64 {
+ // Marked the class public so that it shows up in javadoc generation. All methods are static "package" level.
+
+ private final static byte[] CRLF = {'\r', '\n'};
+
+ private static final char[] CA =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
+
+ private static final char[] CA_URL_SAFE =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();
+
+ private static final int[] IA = new int[256];
+
+ /**
+ * Private constructor to prevent instantiation.
+ */
+ private ApacheModifiedMiGBase64() {}
+
+ static {
+ Arrays.fill(IA, -1);
+ for (int i = 0, iS = CA.length; i < iS; i++) {
+ IA[CA[i]] = i;
+
+ // Store the URL_SAFE values in the same IA array.
+ // This way we can auto-decode URL-SAFE or standard alphabet, without
+ // consumer needing to specify decode alphabet ahead of time:
+ IA[CA_URL_SAFE[i]] = i;
+ }
+ IA['='] = 0;
+ }
+
+ /**
+ * Decodes a BASE64 encoded char array. All illegal characters will be ignored and can handle both arrays with
+ * and without line separators.
+ *
+ * @param sArr The source array.
+ * @return The decoded array of bytes. May be of length 0.
+ */
+ static byte[] decode(final char[] sArr) {
+ final int sLen = sArr != null ? sArr.length : 0;
+ if (sLen == 0) {
+ return new byte[0];
+ }
+
+ // Find earliest pad character so that we can decode things like "AA==AA==" consistently.
+ int padPos = sLen;
+ int padCount = 0;
+ for (int i = sLen - 1; i >= 0; i--) {
+ if (sArr[i] == '=') {
+ padPos = i;
+ padCount = 1;
+ if (i + 1 < sLen && sArr[i + 1] == '=') {
+ padCount = 2;
+ }
+ }
+ }
+
+ // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
+ // so we don't have to reallocate & copy it later.
+ int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
+ for (int i = 0; i < padPos; i++) {
+ if (IA[sArr[i]] < 0) {
+ sepCnt++;
+ }
+ }
+
+ final int len = ((padPos + padCount - sepCnt) * 6 >> 3) - padCount;
+ if (len <= 0) {
+ return new byte[0];
+ }
+ final byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+ int d = 0;
+ int i = 0;
+ try {
+ for (int s = 0; d < len; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ i = 0;
+ for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
+ int c = IA[sArr[s++]];
+ if (c >= 0) {
+ i |= c << (18 - j * 6);
+ } else {
+ j--;
+ }
+ }
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len) {
+ dArr[d++] = (byte) i;
+ }
+ }
+ }
+ } catch (ArrayIndexOutOfBoundsException aioobe) {
+ // Handle url-safe input (with no padding).
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len) {
+ dArr[d] = (byte) i;
+ }
+ }
+ }
+ return dArr;
+ }
+
+ /**
+ * Decodes a BASE64 encoded byte array. All illegal characters will be ignored and can handle both arrays with
+ * and without line separators.
+ *
+ * @param sArr The source array.
+ * @return The decoded array of bytes. May be of length 0.
+ */
+ static byte[] decode(final byte[] sArr) {
+ final int sLen = sArr != null ? sArr.length : 0;
+ if (sLen == 0) {
+ return new byte[0];
+ }
+
+ // Find earliest pad character so that we can decode things like "AA==AA==" consistently.
+ int padPos = sLen;
+ int padCount = 0;
+ for (int i = sLen - 1; i >= 0; i--) {
+ if (sArr[i] == '=') {
+ padPos = i;
+ padCount = 1;
+ if (i + 1 < sLen && sArr[i + 1] == '=') {
+ padCount = 2;
+ }
+ }
+ }
+
+ // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
+ // so we don't have to reallocate & copy it later.
+ int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
+ for (int i = 0; i < padPos; i++) {
+ if (IA[sArr[i] & 0xff] < 0) {
+ sepCnt++;
+ }
+ }
+
+ final int len = ((padPos + padCount - sepCnt) * 6 >> 3) - padCount;
+ if (len <= 0) {
+ return new byte[0];
+ }
+ final byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+ int d = 0;
+ int i = 0;
+ try {
+ for (int s = 0; d < len; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ i = 0;
+ for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
+ int c = IA[sArr[s++] & 0xff];
+ if (c >= 0) {
+ i |= c << (18 - j * 6);
+ } else {
+ j--;
+ }
+ }
+
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len) {
+ dArr[d++] = (byte) i;
+ }
+ }
+ }
+ } catch (ArrayIndexOutOfBoundsException aioobe) {
+ // Handle url-safe input (with no padding).
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len) {
+ dArr[d] = (byte) i;
+ }
+ }
+ }
+ return dArr;
+ }
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>byte[]</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @param urlSafe If true, use the URL_SAFE base64 alphabet (-_) instead of the standard alphabet (+/).
+ * @param maxResultSize Largest size of result we are willing to encode (typically Integer.MAX_VALUE).
+ * @return A BASE64 encoded array.
+ */
+ static byte[] encodeToByte(byte[] sArr, final boolean lineSep, boolean urlSafe, int maxResultSize) {
+ return encodeToByte(sArr, lineSep, urlSafe, maxResultSize, CRLF, 76);
+ }
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>byte[]</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @param urlSafe If true, use the URL_SAFE base64 alphabet (-_) instead of the standard alphabet (+/).
+ * @param maxResultSize Largest size of result we are willing to encode (typically Integer.MAX_VALUE).
+ * @param lineSeparator Sequence of bytes to use as the line separator (typically {'\r','\n'}). Ignored
+ * if <code>lineSep</code> is set to false.
+ * @param lineLen Number of characters to write out per line before writing the lineSeparator
+ * sequence. Ignored if <code>lineSep</code> is set to false.
+ * @return A BASE64 encoded array.
+ */
+ static byte[] encodeToByte(
+ final byte[] sArr, final boolean lineSep, final boolean urlSafe, final int maxResultSize,
+ final byte[] lineSeparator, final int lineLen
+ ) {
+ if (sArr == null || sArr.length == 0) { return sArr; }
+
+ final int sLen = sArr.length;
+ final int eLen = (sLen / 3) * 3; // Length of even 24-bits.
+ final int left = sLen - eLen; // A value between 0 and 2.
+ final int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
+ int dLen = cCnt + (lineSep ? (cCnt - 1) / lineLen * lineSeparator.length : 0); // Length of returned array
+
+ // org.apache.commons.binary.codec.Base64 always ends with CRLF in chunking mode.
+ if (lineSep) {
+ dLen += lineSeparator.length;
+ }
+
+ final char[] ENCODE_ARRAY = urlSafe ? ApacheModifiedMiGBase64.CA_URL_SAFE : ApacheModifiedMiGBase64.CA;
+ if (urlSafe && left > 0) {
+ dLen--;
+ if (left != 2) {
+ dLen--;
+ }
+ }
+ checkLen(dLen, maxResultSize);
+ final byte[] dArr = new byte[dLen];
+
+ // Encode even 24-bits
+ int charCount = 0;
+ for (int s = 0, d = 0; s < eLen; ) {
+ // Copy next three bytes into lower 24 bits of int, paying attension to sign.
+ int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
+
+ // Encode the int into four chars
+ dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 18) & 0x3f];
+ dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 12) & 0x3f];
+ dArr[d++] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[d++] = (byte) ENCODE_ARRAY[i & 0x3f];
+ charCount += 4;
+
+ // Add optional line separator
+ if (lineSep && charCount % lineLen <= 3 && d < dLen - lineSeparator.length) {
+ System.arraycopy(lineSeparator, 0, dArr, d, lineSeparator.length);
+ d += lineSeparator.length;
+ }
+ }
+
+ // Make space for our final CRLF.
+ if (lineSep) {
+ dLen -= lineSeparator.length;
+ }
+
+ // Pad and encode last bits if source isn't an even 24 bits.
+ if (left > 0) {
+ // Prepare the int
+ int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
+
+ // Set last four chars
+ // (url-safe omits the '=' padding).
+ if (urlSafe && left == 2) {
+ dArr[dLen - 3] = (byte) ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 2] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[dLen - 1] = (byte) ENCODE_ARRAY[i & 0x3f];
+ } else if (urlSafe) {
+ dArr[dLen - 2] = (byte) ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 1] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ } else {
+ dArr[dLen - 4] = (byte) ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 3] = (byte) ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[dLen - 2] = (byte) (left == 2 ? ENCODE_ARRAY[i & 0x3f] : '=');
+ dArr[dLen - 1] = '=';
+ }
+ }
+
+ // And now we append our final CRLF if necessary.
+ if (lineSep) {
+ dLen += lineSeparator.length;
+ System.arraycopy(lineSeparator, 0, dArr, dLen - lineSeparator.length, lineSeparator.length);
+ }
+ return dArr;
+ }
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>char[]</code> representation in accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @param urlSafe If true, use the URL_SAFE base64 alphabet (-_) instead of the standard alphabet (+/).
+ * @param maxResultSize Largest size of result we are willing to encode (typically Integer.MAX_VALUE).
+ * @return A BASE64 encoded array.
+ */
+ static char[] encodeToChar(
+ final byte[] sArr, final boolean lineSep, final boolean urlSafe, final int maxResultSize
+ ) {
+ if (sArr == null) { return null; }
+ if (sArr.length == 0) { return new char[0]; }
+
+ final int sLen = sArr.length;
+ final int eLen = (sLen / 3) * 3; // Length of even 24-bits.
+ final int left = sLen - eLen; // A value between 0 and 2.
+ final int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
+ int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
+
+ // org.apache.commons.binary.codec.Base64 always ends with CRLF in chunking mode.
+ if (lineSep) {
+ dLen += 2;
+ }
+
+ final char[] ENCODE_ARRAY = urlSafe ? ApacheModifiedMiGBase64.CA_URL_SAFE : ApacheModifiedMiGBase64.CA;
+ if (urlSafe && left > 0) {
+ dLen--;
+ if (left != 2) {
+ dLen--;
+ }
+ }
+ checkLen(dLen, maxResultSize);
+ final char[] dArr = new char[dLen];
+
+ // Encode even 24-bits
+ for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+ // Copy next three bytes into lower 24 bits of int, paying attension to sign.
+ int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
+
+ // Encode the int into four chars
+ dArr[d++] = ENCODE_ARRAY[(i >>> 18) & 0x3f];
+ dArr[d++] = ENCODE_ARRAY[(i >>> 12) & 0x3f];
+ dArr[d++] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[d++] = ENCODE_ARRAY[i & 0x3f];
+
+ // Add optional line separator
+ if (lineSep && ++cc == 19 && d < dLen - 2) {
+ dArr[d++] = '\r';
+ dArr[d++] = '\n';
+ cc = 0;
+ }
+ }
+
+ // Make space for our final CRLF.
+ if (lineSep) {
+ dLen -= 2;
+ }
+
+ // Pad and encode last bits if source isn't even 24 bits.
+ if (left > 0) {
+ // Prepare the int
+ int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
+
+ // Set last four chars
+ // (url-safe omits the '=' padding).
+ if (urlSafe && left == 2) {
+ dArr[dLen - 3] = ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 2] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[dLen - 1] = ENCODE_ARRAY[i & 0x3f];
+ } else if (urlSafe) {
+ dArr[dLen - 2] = ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 1] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ } else {
+ dArr[dLen - 4] = ENCODE_ARRAY[i >> 12];
+ dArr[dLen - 3] = ENCODE_ARRAY[(i >>> 6) & 0x3f];
+ dArr[dLen - 2] = left == 2 ? ENCODE_ARRAY[i & 0x3f] : '=';
+ dArr[dLen - 1] = '=';
+ }
+ }
+
+ // And now we append our final CRLF if necessary.
+ if (lineSep) {
+ dLen += 2;
+ dArr[dLen - 2] = '\r';
+ dArr[dLen - 1] = '\n';
+ }
+ return dArr;
+ }
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>String</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @param urlSafe If true, use the URL_SAFE base64 alphabet (-_) instead of the standard alphabet (+/).
+ * @param maxResultSize Largest size of result we are willing to encode (typically Integer.MAX_VALUE).
+ * @return A BASE64 encoded array.
+ */
+ static String encodeToString(
+ byte[] sArr, boolean lineSep, boolean urlSafe, int maxResultSize
+ ) {
+ if (sArr == null) { return null; }
+ if (sArr.length == 0) { return ""; }
+
+ // Reuse char[] since we can't create a String incrementally anyway and StringBuffer/Builder would be slower.
+ return new String(encodeToChar(sArr, lineSep, urlSafe, maxResultSize));
+ }
+
+
+ private static void checkLen(int dLen, int maxResultSize) {
+ if (dLen > maxResultSize) {
+ throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
+ dLen +
+ ") than the specified maximum size of " +
+ maxResultSize);
+ }
+ }
+
+}
\ No newline at end of file
Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java?rev=1447443&r1=1447442&r2=1447443&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/Base64.java Mon Feb 18 19:14:31 2013
@@ -480,6 +480,25 @@ public class Base64 extends BaseNCodec {
}
}
+ @Override
+ public byte[] decode(final byte[] pArray) {
+ if (pArray == null || pArray.length == 0) {
+ return pArray;
+ }
+ return ApacheModifiedMiGBase64.decode(pArray);
+ }
+
+ @Override
+ public byte[] encode(final byte[] pArray) {
+ if (pArray == null || pArray.length == 0) {
+ return pArray;
+ }
+ return ApacheModifiedMiGBase64.encodeToByte(
+ pArray, lineSeparator != null, isUrlSafe(), Integer.MAX_VALUE, lineSeparator, lineLength
+ );
+ }
+
+
/**
* Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the
* method treats whitespace as valid.
@@ -563,7 +582,9 @@ public class Base64 extends BaseNCodec {
* @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
*/
public static String encodeBase64String(final byte[] binaryData) {
- return StringUtils.newStringUtf8(encodeBase64(binaryData, false));
+ return ApacheModifiedMiGBase64.encodeToString(
+ binaryData, false, false, Integer.MAX_VALUE
+ );
}
/**
@@ -589,7 +610,9 @@ public class Base64 extends BaseNCodec {
* @since 1.4
*/
public static String encodeBase64URLSafeString(final byte[] binaryData) {
- return StringUtils.newStringUtf8(encodeBase64(binaryData, false, true));
+ return ApacheModifiedMiGBase64.encodeToString(
+ binaryData, false, true, Integer.MAX_VALUE
+ );
}
/**
@@ -656,24 +679,12 @@ public class Base64 extends BaseNCodec {
*/
public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked,
final boolean urlSafe, final int maxResultSize) {
- if (binaryData == null || binaryData.length == 0) {
- return binaryData;
- }
-
- // Create this so can use the super-class method
- // Also ensures that the same roundings are performed by the ctor and the code
- final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
- final long len = b64.getEncodedLength(binaryData);
- if (len > maxResultSize) {
- throw new IllegalArgumentException("Input array too big, the output array would be bigger (" +
- len +
- ") than the specified maximum size of " +
- maxResultSize);
- }
-
- return b64.encode(binaryData);
+ return ApacheModifiedMiGBase64.encodeToByte(
+ binaryData, isChunked, urlSafe, maxResultSize
+ );
}
+
/**
* Decodes a Base64 String into octets
*
@@ -683,7 +694,10 @@ public class Base64 extends BaseNCodec {
* @since 1.4
*/
public static byte[] decodeBase64(final String base64String) {
- return new Base64().decode(base64String);
+ if (base64String == null) { return null; }
+ if ("".equals(base64String)) { return new byte[0]; }
+
+ return ApacheModifiedMiGBase64.decode(base64String.toCharArray());
}
/**
@@ -694,7 +708,9 @@ public class Base64 extends BaseNCodec {
* @return Array containing decoded data.
*/
public static byte[] decodeBase64(final byte[] base64Data) {
- return new Base64().decode(base64Data);
+ if (base64Data == null || base64Data.length == 0) { return base64Data; }
+
+ return ApacheModifiedMiGBase64.decode(base64Data);
}
// Implementation of the Encoder Interface
Added: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original?rev=1447443&view=auto
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original (added)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/binary/MiGBase64.original Mon Feb 18 19:14:31 2013
@@ -0,0 +1,590 @@
+
+// Okay, it's slightly different than true original MiGBase64.java:
+// - This comment added, and the code was reformatted using default
+// code formatting options in IntelliJ IDEA 10.5. (Jan 28th, 2013)
+
+package util;
+
+import java.util.Arrays;
+
+/**
+ * A very fast and memory efficient class to encode and decode to and from BASE64 in full accordance
+ * with RFC 2045.<br><br>
+ * On Windows XP sp1 with 1.4.2_04 and later ;), this encoder and decoder is about 10 times faster
+ * on small arrays (10 - 1000 bytes) and 2-3 times as fast on larger arrays (10000 - 1000000 bytes)
+ * compared to <code>sun.misc.Encoder()/Decoder()</code>.<br><br>
+ * <p/>
+ * On byte arrays the encoder is about 20% faster than Jakarta Commons Codec for encode and
+ * about 50% faster for decoding large arrays. This implementation is about twice as fast on very small
+ * arrays (< 30 bytes). If source/destination is a <code>String</code> this
+ * version is about three times as fast due to the fact that the Commons Codec result has to be recoded
+ * to a <code>String</code> from <code>byte[]</code>, which is very expensive.<br><br>
+ * <p/>
+ * This encode/decode algorithm doesn't create any temporary arrays as many other codecs do, it only
+ * allocates the resulting array. This produces less garbage and it is possible to handle arrays twice
+ * as large as algorithms that create a temporary array. (E.g. Jakarta Commons Codec). It is unknown
+ * whether Sun's <code>sun.misc.Encoder()/Decoder()</code> produce temporary arrays but since performance
+ * is quite low it probably does.<br><br>
+ * <p/>
+ * The encoder produces the same output as the Sun one except that the Sun's encoder appends
+ * a trailing line separator if the last character isn't a pad. Unclear why but it only adds to the
+ * length and is probably a side effect. Both are in conformance with RFC 2045 though.<br>
+ * Commons codec seem to always att a trailing line separator.<br><br>
+ * <p/>
+ * <b>Note!</b>
+ * The encode/decode method pairs (types) come in three versions with the <b>exact</b> same algorithm and
+ * thus a lot of code redundancy. This is to not create any temporary arrays for transcoding to/from different
+ * format types. The methods not used can simply be commented out.<br><br>
+ * <p/>
+ * There is also a "fast" version of all decode methods that works the same way as the normal ones, but
+ * har a few demands on the decoded input. Normally though, these fast verions should be used if the source if
+ * the input is known and it hasn't bee tampered with.<br><br>
+ * <p/>
+ * If you find the code useful or you find a bug, please send me a note at base64 @ miginfocom . com.
+ * <p/>
+ * Licence (BSD):
+ * ==============
+ * <p/>
+ * Copyright (c) 2004, Mikael Grev, MiG InfoCom AB. (base64 @ miginfocom . com)
+ * All rights reserved.
+ * <p/>
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this list
+ * of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this
+ * list of conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution.
+ * Neither the name of the MiG InfoCom AB nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without specific
+ * prior written permission.
+ * <p/>
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+ * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+ * OF SUCH DAMAGE.
+ *
+ * @author Mikael Grev
+ * Date: 2004-aug-02
+ * Time: 11:31:11
+ * @version 2.2
+ */
+
+public class MiGBase64 {
+ private static final char[] CA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
+ private static final int[] IA = new int[256];
+
+ static {
+ Arrays.fill(IA, -1);
+ for (int i = 0, iS = CA.length; i < iS; i++)
+ IA[CA[i]] = i;
+ IA['='] = 0;
+ }
+
+ // ****************************************************************************************
+ // * char[] version
+ // ****************************************************************************************
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>char[]</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @return A BASE64 encoded array. Never <code>null</code>.
+ */
+ public final static char[] encodeToChar(byte[] sArr, boolean lineSep) {
+ // Check special case
+ int sLen = sArr != null ? sArr.length : 0;
+ if (sLen == 0)
+ return new char[0];
+
+ int eLen = (sLen / 3) * 3; // Length of even 24-bits.
+ int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
+ int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
+ char[] dArr = new char[dLen];
+
+ // Encode even 24-bits
+ for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+ // Copy next three bytes into lower 24 bits of int, paying attension to sign.
+ int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
+
+ // Encode the int into four chars
+ dArr[d++] = CA[(i >>> 18) & 0x3f];
+ dArr[d++] = CA[(i >>> 12) & 0x3f];
+ dArr[d++] = CA[(i >>> 6) & 0x3f];
+ dArr[d++] = CA[i & 0x3f];
+
+ // Add optional line separator
+ if (lineSep && ++cc == 19 && d < dLen - 2) {
+ dArr[d++] = '\r';
+ dArr[d++] = '\n';
+ cc = 0;
+ }
+ }
+
+ // Pad and encode last bits if source isn't even 24 bits.
+ int left = sLen - eLen; // 0 - 2.
+ if (left > 0) {
+ // Prepare the int
+ int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
+
+ // Set last four chars
+ dArr[dLen - 4] = CA[i >> 12];
+ dArr[dLen - 3] = CA[(i >>> 6) & 0x3f];
+ dArr[dLen - 2] = left == 2 ? CA[i & 0x3f] : '=';
+ dArr[dLen - 1] = '=';
+ }
+ return dArr;
+ }
+
+ /**
+ * Decodes a BASE64 encoded char array. All illegal characters will be ignored and can handle both arrays with
+ * and without line separators.
+ *
+ * @param sArr The source array. <code>null</code> or length 0 will return an empty array.
+ * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
+ * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
+ */
+ public final static byte[] decode(char[] sArr) {
+ // Check special case
+ int sLen = sArr != null ? sArr.length : 0;
+ if (sLen == 0)
+ return new byte[0];
+
+ // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
+ // so we don't have to reallocate & copy it later.
+ int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
+ for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
+ if (IA[sArr[i]] < 0)
+ sepCnt++;
+
+ // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
+ if ((sLen - sepCnt) % 4 != 0)
+ return null;
+
+ int pad = 0;
+ for (int i = sLen; i > 1 && IA[sArr[--i]] <= 0; )
+ if (sArr[i] == '=')
+ pad++;
+
+ int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ for (int s = 0, d = 0; d < len; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = 0;
+ for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
+ int c = IA[sArr[s++]];
+ if (c >= 0)
+ i |= c << (18 - j * 6);
+ else
+ j--;
+ }
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len)
+ dArr[d++] = (byte) i;
+ }
+ }
+ return dArr;
+ }
+
+ /**
+ * Decodes a BASE64 encoded char array that is known to be resonably well formatted. The method is about twice as
+ * fast as {@link #decode(char[])}. The preconditions are:<br>
+ * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
+ * + Line separator must be "\r\n", as specified in RFC 2045
+ * + The array must not contain illegal characters within the encoded string<br>
+ * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
+ *
+ * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
+ * @return The decoded array of bytes. May be of length 0.
+ */
+ public final static byte[] decodeFast(char[] sArr) {
+ // Check special case
+ int sLen = sArr.length;
+ if (sLen == 0)
+ return new byte[0];
+
+ int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
+
+ // Trim illegal chars from start
+ while (sIx < eIx && IA[sArr[sIx]] < 0)
+ sIx++;
+
+ // Trim illegal chars from end
+ while (eIx > 0 && IA[sArr[eIx]] < 0)
+ eIx--;
+
+ // get the padding count (=) (0, 1 or 2)
+ int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end.
+ int cCnt = eIx - sIx + 1; // Content count including possible separators
+ int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
+
+ int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ // Decode all but the last 0 - 2 bytes.
+ int d = 0;
+ for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
+
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ dArr[d++] = (byte) (i >> 8);
+ dArr[d++] = (byte) i;
+
+ // If line separator, jump over it.
+ if (sepCnt > 0 && ++cc == 19) {
+ sIx += 2;
+ cc = 0;
+ }
+ }
+
+ if (d < len) {
+ // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+ int i = 0;
+ for (int j = 0; sIx <= eIx - pad; j++)
+ i |= IA[sArr[sIx++]] << (18 - j * 6);
+
+ for (int r = 16; d < len; r -= 8)
+ dArr[d++] = (byte) (i >> r);
+ }
+
+ return dArr;
+ }
+
+ // ****************************************************************************************
+ // * byte[] version
+ // ****************************************************************************************
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>byte[]</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @return A BASE64 encoded array. Never <code>null</code>.
+ */
+ public final static byte[] encodeToByte(byte[] sArr, boolean lineSep) {
+ // Check special case
+ int sLen = sArr != null ? sArr.length : 0;
+ if (sLen == 0)
+ return new byte[0];
+
+ int eLen = (sLen / 3) * 3; // Length of even 24-bits.
+ int cCnt = ((sLen - 1) / 3 + 1) << 2; // Returned character count
+ int dLen = cCnt + (lineSep ? (cCnt - 1) / 76 << 1 : 0); // Length of returned array
+ byte[] dArr = new byte[dLen];
+
+ // Encode even 24-bits
+ for (int s = 0, d = 0, cc = 0; s < eLen; ) {
+ // Copy next three bytes into lower 24 bits of int, paying attension to sign.
+ int i = (sArr[s++] & 0xff) << 16 | (sArr[s++] & 0xff) << 8 | (sArr[s++] & 0xff);
+
+ // Encode the int into four chars
+ dArr[d++] = (byte) CA[(i >>> 18) & 0x3f];
+ dArr[d++] = (byte) CA[(i >>> 12) & 0x3f];
+ dArr[d++] = (byte) CA[(i >>> 6) & 0x3f];
+ dArr[d++] = (byte) CA[i & 0x3f];
+
+ // Add optional line separator
+ if (lineSep && ++cc == 19 && d < dLen - 2) {
+ dArr[d++] = '\r';
+ dArr[d++] = '\n';
+ cc = 0;
+ }
+ }
+
+ // Pad and encode last bits if source isn't an even 24 bits.
+ int left = sLen - eLen; // 0 - 2.
+ if (left > 0) {
+ // Prepare the int
+ int i = ((sArr[eLen] & 0xff) << 10) | (left == 2 ? ((sArr[sLen - 1] & 0xff) << 2) : 0);
+
+ // Set last four chars
+ dArr[dLen - 4] = (byte) CA[i >> 12];
+ dArr[dLen - 3] = (byte) CA[(i >>> 6) & 0x3f];
+ dArr[dLen - 2] = left == 2 ? (byte) CA[i & 0x3f] : (byte) '=';
+ dArr[dLen - 1] = '=';
+ }
+ return dArr;
+ }
+
+ /**
+ * Decodes a BASE64 encoded byte array. All illegal characters will be ignored and can handle both arrays with
+ * and without line separators.
+ *
+ * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
+ * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
+ * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
+ */
+ public final static byte[] decode(byte[] sArr) {
+ // Check special case
+ int sLen = sArr.length;
+
+ // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
+ // so we don't have to reallocate & copy it later.
+ int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
+ for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
+ if (IA[sArr[i] & 0xff] < 0)
+ sepCnt++;
+
+ // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
+ if ((sLen - sepCnt) % 4 != 0)
+ return null;
+
+ int pad = 0;
+ for (int i = sLen; i > 1 && IA[sArr[--i] & 0xff] <= 0; )
+ if (sArr[i] == '=')
+ pad++;
+
+ int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ for (int s = 0, d = 0; d < len; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = 0;
+ for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
+ int c = IA[sArr[s++] & 0xff];
+ if (c >= 0)
+ i |= c << (18 - j * 6);
+ else
+ j--;
+ }
+
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len)
+ dArr[d++] = (byte) i;
+ }
+ }
+
+ return dArr;
+ }
+
+
+ /**
+ * Decodes a BASE64 encoded byte array that is known to be resonably well formatted. The method is about twice as
+ * fast as {@link #decode(byte[])}. The preconditions are:<br>
+ * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
+ * + Line separator must be "\r\n", as specified in RFC 2045
+ * + The array must not contain illegal characters within the encoded string<br>
+ * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
+ *
+ * @param sArr The source array. Length 0 will return an empty array. <code>null</code> will throw an exception.
+ * @return The decoded array of bytes. May be of length 0.
+ */
+ public final static byte[] decodeFast(byte[] sArr) {
+ // Check special case
+ int sLen = sArr.length;
+ if (sLen == 0)
+ return new byte[0];
+
+ int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
+
+ // Trim illegal chars from start
+ while (sIx < eIx && IA[sArr[sIx] & 0xff] < 0)
+ sIx++;
+
+ // Trim illegal chars from end
+ while (eIx > 0 && IA[sArr[eIx] & 0xff] < 0)
+ eIx--;
+
+ // get the padding count (=) (0, 1 or 2)
+ int pad = sArr[eIx] == '=' ? (sArr[eIx - 1] == '=' ? 2 : 1) : 0; // Count '=' at end.
+ int cCnt = eIx - sIx + 1; // Content count including possible separators
+ int sepCnt = sLen > 76 ? (sArr[76] == '\r' ? cCnt / 78 : 0) << 1 : 0;
+
+ int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ // Decode all but the last 0 - 2 bytes.
+ int d = 0;
+ for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = IA[sArr[sIx++]] << 18 | IA[sArr[sIx++]] << 12 | IA[sArr[sIx++]] << 6 | IA[sArr[sIx++]];
+
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ dArr[d++] = (byte) (i >> 8);
+ dArr[d++] = (byte) i;
+
+ // If line separator, jump over it.
+ if (sepCnt > 0 && ++cc == 19) {
+ sIx += 2;
+ cc = 0;
+ }
+ }
+
+ if (d < len) {
+ // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+ int i = 0;
+ for (int j = 0; sIx <= eIx - pad; j++)
+ i |= IA[sArr[sIx++]] << (18 - j * 6);
+
+ for (int r = 16; d < len; r -= 8)
+ dArr[d++] = (byte) (i >> r);
+ }
+
+ return dArr;
+ }
+
+ // ****************************************************************************************
+ // * String version
+ // ****************************************************************************************
+
+ /**
+ * Encodes a raw byte array into a BASE64 <code>String</code> representation i accordance with RFC 2045.
+ *
+ * @param sArr The bytes to convert. If <code>null</code> or length 0 an empty array will be returned.
+ * @param lineSep Optional "\r\n" after 76 characters, unless end of file.<br>
+ * No line separator will be in breach of RFC 2045 which specifies max 76 per line but will be a
+ * little faster.
+ * @return A BASE64 encoded array. Never <code>null</code>.
+ */
+ public final static String encodeToString(byte[] sArr, boolean lineSep) {
+ // Reuse char[] since we can't create a String incrementally anyway and StringBuffer/Builder would be slower.
+ return new String(encodeToChar(sArr, lineSep));
+ }
+
+ /**
+ * Decodes a BASE64 encoded <code>String</code>. All illegal characters will be ignored and can handle both strings with
+ * and without line separators.<br>
+ * <b>Note!</b> It can be up to about 2x the speed to call <code>decode(str.toCharArray())</code> instead. That
+ * will create a temporary array though. This version will use <code>str.charAt(i)</code> to iterate the string.
+ *
+ * @param str The source string. <code>null</code> or length 0 will return an empty array.
+ * @return The decoded array of bytes. May be of length 0. Will be <code>null</code> if the legal characters
+ * (including '=') isn't divideable by 4. (I.e. definitely corrupted).
+ */
+ public final static byte[] decode(String str) {
+ // Check special case
+ int sLen = str != null ? str.length() : 0;
+ if (sLen == 0)
+ return new byte[0];
+
+ // Count illegal characters (including '\r', '\n') to know what size the returned array will be,
+ // so we don't have to reallocate & copy it later.
+ int sepCnt = 0; // Number of separator characters. (Actually illegal characters, but that's a bonus...)
+ for (int i = 0; i < sLen; i++) // If input is "pure" (I.e. no line separators or illegal chars) base64 this loop can be commented out.
+ if (IA[str.charAt(i)] < 0)
+ sepCnt++;
+
+ // Check so that legal chars (including '=') are evenly divideable by 4 as specified in RFC 2045.
+ if ((sLen - sepCnt) % 4 != 0)
+ return null;
+
+ // Count '=' at end
+ int pad = 0;
+ for (int i = sLen; i > 1 && IA[str.charAt(--i)] <= 0; )
+ if (str.charAt(i) == '=')
+ pad++;
+
+ int len = ((sLen - sepCnt) * 6 >> 3) - pad;
+
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ for (int s = 0, d = 0; d < len; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = 0;
+ for (int j = 0; j < 4; j++) { // j only increased if a valid char was found.
+ int c = IA[str.charAt(s++)];
+ if (c >= 0)
+ i |= c << (18 - j * 6);
+ else
+ j--;
+ }
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ if (d < len) {
+ dArr[d++] = (byte) (i >> 8);
+ if (d < len)
+ dArr[d++] = (byte) i;
+ }
+ }
+ return dArr;
+ }
+
+ /**
+ * Decodes a BASE64 encoded string that is known to be resonably well formatted. The method is about twice as
+ * fast as {@link #decode(String)}. The preconditions are:<br>
+ * + The array must have a line length of 76 chars OR no line separators at all (one line).<br>
+ * + Line separator must be "\r\n", as specified in RFC 2045
+ * + The array must not contain illegal characters within the encoded string<br>
+ * + The array CAN have illegal characters at the beginning and end, those will be dealt with appropriately.<br>
+ *
+ * @param s The source string. Length 0 will return an empty array. <code>null</code> will throw an exception.
+ * @return The decoded array of bytes. May be of length 0.
+ */
+ public final static byte[] decodeFast(String s) {
+ // Check special case
+ int sLen = s.length();
+ if (sLen == 0)
+ return new byte[0];
+
+ int sIx = 0, eIx = sLen - 1; // Start and end index after trimming.
+
+ // Trim illegal chars from start
+ while (sIx < eIx && IA[s.charAt(sIx) & 0xff] < 0)
+ sIx++;
+
+ // Trim illegal chars from end
+ while (eIx > 0 && IA[s.charAt(eIx) & 0xff] < 0)
+ eIx--;
+
+ // get the padding count (=) (0, 1 or 2)
+ int pad = s.charAt(eIx) == '=' ? (s.charAt(eIx - 1) == '=' ? 2 : 1) : 0; // Count '=' at end.
+ int cCnt = eIx - sIx + 1; // Content count including possible separators
+ int sepCnt = sLen > 76 ? (s.charAt(76) == '\r' ? cCnt / 78 : 0) << 1 : 0;
+
+ int len = ((cCnt - sepCnt) * 6 >> 3) - pad; // The number of decoded bytes
+ byte[] dArr = new byte[len]; // Preallocate byte[] of exact length
+
+ // Decode all but the last 0 - 2 bytes.
+ int d = 0;
+ for (int cc = 0, eLen = (len / 3) * 3; d < eLen; ) {
+ // Assemble three bytes into an int from four "valid" characters.
+ int i = IA[s.charAt(sIx++)] << 18 | IA[s.charAt(sIx++)] << 12 | IA[s.charAt(sIx++)] << 6 | IA[s.charAt(sIx++)];
+
+ // Add the bytes
+ dArr[d++] = (byte) (i >> 16);
+ dArr[d++] = (byte) (i >> 8);
+ dArr[d++] = (byte) i;
+
+ // If line separator, jump over it.
+ if (sepCnt > 0 && ++cc == 19) {
+ sIx += 2;
+ cc = 0;
+ }
+ }
+
+ if (d < len) {
+ // Decode last 1-3 bytes (incl '=') into 1-3 bytes
+ int i = 0;
+ for (int j = 0; sIx <= eIx - pad; j++)
+ i |= IA[s.charAt(sIx++)] << (18 - j * 6);
+
+ for (int r = 16; d < len; r -= 8)
+ dArr[d++] = (byte) (i >> r);
+ }
+
+ return dArr;
+ }
+}
\ No newline at end of file