You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2012/03/07 16:34:02 UTC

svn commit: r1297993 - in /commons/proper/codec/trunk/src: changes/changes.xml main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java

Author: ggregory
Date: Wed Mar  7 15:34:01 2012
New Revision: 1297993

URL: http://svn.apache.org/viewvc?rev=1297993&view=rev
Log:
[CODEC-121] QuotedPrintableCodec does not support soft line break per the 'quoted-printable' example on Wikipedia. Apply patch with minor modifications.

Modified:
    commons/proper/codec/trunk/src/changes/changes.xml
    commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
    commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java

Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Wed Mar  7 15:34:01 2012
@@ -25,6 +25,11 @@
     <!-- <release version="2.0" date="TBA" description="Feature and fix release."> <action dev="ggregory" type="fix" issue="CODEC-126"> Make 
       org.apache.commons.codec.net.URLCodec charset field final. </action>   </release>
     -->
+    <release version="1.6.1" date="TBD" description="Feature and fix release.">
+      <action dev="ggregory" type="fix" issue="CODEC-121" due-to="javajohn">
+        QuotedPrintableCodec does not support soft line break per the 'quoted-printable' example on Wikipedia
+      </action>
+    </release>
     <release version="1.6" date="20 November 2011" description="Feature and fix release.">
       <action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
         Use standard Maven directory layout.

Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java Wed Mar  7 15:34:01 2012
@@ -42,20 +42,10 @@ import org.apache.commons.codec.binary.S
  * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
  * gateway.
  * </p>
- * 
- * <p>
- * Note:
- * </p>
- * <p>
- * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
- * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
- * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
- * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
- * </p>
- * 
+ *
  * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
  *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
- * 
+ *
  * @author Apache Software Foundation
  * @since 1.3
  * @version $Id$
@@ -76,6 +66,14 @@ public class QuotedPrintableCodec implem
     private static final byte TAB = 9;
 
     private static final byte SPACE = 32;
+
+    private static final byte CR = 13;
+
+    private static final byte LF = 10;
+
+    /** Safe line length for quoted printable encoded text. */
+    private static final int SAFE_LENGTH = 73;
+
     // Static initializer for printable chars collection
     static {
         // alpha characters
@@ -111,26 +109,78 @@ public class QuotedPrintableCodec implem
      * Encodes byte into its quoted-printable representation.
      * 
      * @param b
-     *                  byte to encode
+     *            byte to encode
      * @param buffer
-     *                  the buffer to write to
+     *            the buffer to write to
+     * @return The number of bytes written to the <code>buffer</code>
      */
-    private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
+    private static final int encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
         buffer.write(ESCAPE_CHAR);
         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
         buffer.write(hex1);
         buffer.write(hex2);
+        return 3;
+    }
+
+    /**
+     * Return the byte at position <code>index</code> of the byte array and
+     * make sure it is unsigned.
+     *
+     * @param index
+     *                  position in the array
+     * @param bytes
+     *                  the byte array
+     * @return the unsigned octet at position <code>index</code> from the array
+     */
+    private static int getUnsignedOctet(final int index, final byte[] bytes) {
+        int b = bytes[index];
+        if (b < 0) {
+            b = 256 + b;
+        }
+        return b;
+    }
+
+    /**
+     * Write a byte to the buffer.
+     *
+     * @param b
+     *                  byte to write
+     * @param encode
+     *                  indicates whether the octet shall be encoded
+     * @param buffer
+     *                  the buffer to write to
+     * @return the number of bytes that have been written to the buffer
+     */
+    private static int encodeByte(final int b, final boolean encode,
+                                  final ByteArrayOutputStream buffer) {
+        if (encode) {
+            return encodeQuotedPrintable(b, buffer);
+        } else {
+            buffer.write(b);
+            return 1;
+        }
+    }
+
+    /**
+     * Checks whether the given byte is whitespace.
+     *
+     * @param b
+     *                  byte to be checked
+     * @return <code>true</code> if the byte is either a space or tab character
+     */
+    private static boolean isWhitespace(final int b) {
+        return b == SPACE || b == TAB;
     }
 
     /**
      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
-     * 
+     *
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521 and is suitable for encoding binary data and unformatted text.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
+     * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
      * </p>
-     * 
+     *
      * @param printable
      *                  bitset of characters deemed quoted-printable
      * @param bytes
@@ -145,29 +195,59 @@ public class QuotedPrintableCodec implem
             printable = PRINTABLE_CHARS;
         }
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        for (byte c : bytes) {
-            int b = c;
-            if (b < 0) {
-                b = 256 + b;
-            }
-            if (printable.get(b)) {
-                buffer.write(b);
+        int pos = 1;
+        // encode up to buffer.length - 3, the last three octets will be treated
+        // separately for simplification of note #3
+        for (int i = 0; i < bytes.length - 3; i++) {
+            int b = getUnsignedOctet(i, bytes);
+            if (pos < SAFE_LENGTH) {
+                // up to this length it is safe to add any byte, encoded or not
+                pos += encodeByte(b, !printable.get(b), buffer);
             } else {
-                encodeQuotedPrintable(b, buffer);
+                // rule #3: whitespace at the end of a line *must* be encoded
+                encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
+
+                // rule #5: soft line break
+                buffer.write(ESCAPE_CHAR);
+                buffer.write(CR);
+                buffer.write(LF);
+                pos = 1;
             }
         }
+
+        // rule #3: whitespace at the end of a line *must* be encoded
+        // if we would do a soft break line after this octet, encode whitespace
+        int b = getUnsignedOctet(bytes.length - 3, bytes);
+        boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5);
+        pos += encodeByte(b, encode, buffer);
+
+        // note #3: '=' *must not* be the ultimate or penultimate character
+        // simplification: if < 6 bytes left, do a soft line break as we may need
+        //                 exactly 6 bytes space for the last 2 bytes
+        if (pos > SAFE_LENGTH - 2) {
+            buffer.write(ESCAPE_CHAR);
+            buffer.write(CR);
+            buffer.write(LF);
+        }
+        for (int i = bytes.length - 2; i < bytes.length; i++) {
+            b = getUnsignedOctet(i, bytes);
+            // rule #3: trailing whitespace shall be encoded
+            encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b));
+            encodeByte(b, encode, buffer);
+        }
+
         return buffer.toByteArray();
     }
 
     /**
-     * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
-     * back to their original representation.
-     * 
+     * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are
+     * converted back to their original representation.
+     *
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
+     * defined in RFC 1521.
      * </p>
-     * 
+     *
      * @param bytes
      *                  array of quoted-printable characters
      * @return array of original bytes
@@ -180,16 +260,21 @@ public class QuotedPrintableCodec implem
         }
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         for (int i = 0; i < bytes.length; i++) {
-            int b = bytes[i];
+            final int b = bytes[i];
             if (b == ESCAPE_CHAR) {
                 try {
-                    int u = Utils.digit16(bytes[++i]);
+                    // if the next octet is a CR we have found a soft line break
+                    if (bytes[++i] == CR) {
+                        continue;
+                    }
+                    int u = Utils.digit16(bytes[i]);
                     int l = Utils.digit16(bytes[++i]);
                     buffer.write((char) ((u << 4) + l));
                 } catch (ArrayIndexOutOfBoundsException e) {
                     throw new DecoderException("Invalid quoted-printable encoding", e);
                 }
-            } else {
+            } else if (b != CR && b != LF) {
+                // every other octet is appended except for CR & LF
                 buffer.write(b);
             }
         }
@@ -200,8 +285,8 @@ public class QuotedPrintableCodec implem
      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
      * 
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521 and is suitable for encoding binary data and unformatted text.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
+     * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
      * </p>
      * 
      * @param bytes
@@ -217,8 +302,8 @@ public class QuotedPrintableCodec implem
      * back to their original representation.
      * 
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+     * as defined in RFC 1521.
      * </p>
      * 
      * @param bytes
@@ -235,8 +320,8 @@ public class QuotedPrintableCodec implem
      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
      * 
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521 and is suitable for encoding binary data.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+     * as defined in RFC 1521 and is suitable for encoding binary data.
      * </p>
      * 
      * @param pString
@@ -365,8 +450,8 @@ public class QuotedPrintableCodec implem
      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
      * 
      * <p>
-     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
-     * RFC 1521 and is suitable for encoding binary data and unformatted text.
+     * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+     * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
      * </p>
      * 
      * @param pString

Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java (original)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java Wed Mar  7 15:34:01 2012
@@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
 import org.apache.commons.codec.CharEncoding;
 import org.apache.commons.codec.DecoderException;
 import org.apache.commons.codec.EncoderException;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -265,30 +264,92 @@ public class QuotedPrintableCodecTest {
     }
 
     @Test
-    @Ignore
-    /**
-     * The QuotedPrintableCodec documentation states that this is not supported.
-     *  
-     * @throws Exception
-     * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
-     */
     public void testSoftLineBreakDecode() throws Exception {
-        String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
-        String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
-        assertEquals(expected, new QuotedPrintableCodec().decode(qpdata));
+        String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics " +
+                "is the most beautiful branch of philosophy.";
+        String expected = "If you believe that truth=beauty, then surely mathematics " +
+                "is the most beautiful branch of philosophy.";
+
+        QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+        assertEquals(expected, qpcodec.decode(qpdata));
+
+        String encoded = qpcodec.encode(expected);
+        assertEquals(expected, qpcodec.decode(encoded));
     }
 
     @Test
-    @Ignore
-    /**
-     * The QuotedPrintableCodec documentation states that this is not supported.
-     *  
-     * @throws Exception
-     * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
-     */
     public void testSoftLineBreakEncode() throws Exception {
-        String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
-        String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
-        assertEquals(qpdata, new QuotedPrintableCodec().encode(expected));
+        String qpdata = "If you believe that truth=3Dbeauty, then surely mathematics is the most " +
+                "b=\r\neautiful branch of philosophy.";
+        String expected = "If you believe that truth=beauty, then surely mathematics is the most " +
+                "beautiful branch of philosophy.";
+
+        QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+        assertEquals(qpdata, qpcodec.encode(expected));
+
+        String decoded = qpcodec.decode(qpdata);
+        assertEquals(qpdata, qpcodec.encode(decoded));
+    }
+
+    @Test
+    public void testSkipNotEncodedCRLF() throws Exception {
+        String qpdata = "CRLF in an\n encoded text should be=20=\r\n\rskipped in the\r decoding.";
+        String expected = "CRLF in an encoded text should be skipped in the decoding.";
+
+        QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+        assertEquals(expected, qpcodec.decode(qpdata));
+
+        String encoded = qpcodec.encode(expected);
+        assertEquals(expected, qpcodec.decode(encoded));
+    }
+
+    @Test
+    public void testTrailingSpecial() throws Exception {
+        final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+
+        String plain ="This is a example of a quoted-printable text file. This might contain sp=cial chars.";
+        String expected = "This is a example of a quoted-printable text file. This might contain sp=3D=\r\ncial chars.";
+        assertEquals(expected, qpcodec.encode(plain));
+
+        plain ="This is a example of a quoted-printable text file. This might contain ta\tbs as well.";
+        expected = "This is a example of a quoted-printable text file. This might contain ta=09=\r\nbs as well.";
+        assertEquals(expected, qpcodec.encode(plain));
     }
+
+    @Test
+    public void testUltimateSoftBreak() throws Exception {
+        final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+
+        String plain ="This is a example of a quoted-printable text file. There is no end to it\t";
+        String expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=09";
+
+        assertEquals(expected, qpcodec.encode(plain));
+
+        plain ="This is a example of a quoted-printable text file. There is no end to it ";
+        expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=20";
+
+        assertEquals(expected, qpcodec.encode(plain));
+
+        // whitespace before soft break
+        plain ="This is a example of a quoted-printable text file. There is no end to   ";
+        expected = "This is a example of a quoted-printable text file. There is no end to=20=\r\n =20";
+
+        assertEquals(expected, qpcodec.encode(plain));
+
+        // non-printable character before soft break
+        plain ="This is a example of a quoted-printable text file. There is no end to=  ";
+        expected = "This is a example of a quoted-printable text file. There is no end to=3D=\r\n =20";
+
+        assertEquals(expected, qpcodec.encode(plain));
+    }
+
+    @Test
+    public void testFinalBytes() throws Exception {
+        // whitespace, but does not need to be encoded
+        String plain ="This is a example of a quoted=printable text file. There is no tt";
+        String expected = "This is a example of a quoted=3Dprintable text file. There is no tt";
+
+        assertEquals(expected, new QuotedPrintableCodec().encode(plain));
+    }
+
 }