You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2012/03/07 16:34:02 UTC
svn commit: r1297993 - in /commons/proper/codec/trunk/src:
changes/changes.xml
main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
Author: ggregory
Date: Wed Mar 7 15:34:01 2012
New Revision: 1297993
URL: http://svn.apache.org/viewvc?rev=1297993&view=rev
Log:
[CODEC-121] QuotedPrintableCodec does not support soft line break per the 'quoted-printable' example on Wikipedia. Apply patch with minor modifications.
Modified:
commons/proper/codec/trunk/src/changes/changes.xml
commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
Modified: commons/proper/codec/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/changes/changes.xml?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/changes/changes.xml (original)
+++ commons/proper/codec/trunk/src/changes/changes.xml Wed Mar 7 15:34:01 2012
@@ -25,6 +25,11 @@
<!-- <release version="2.0" date="TBA" description="Feature and fix release."> <action dev="ggregory" type="fix" issue="CODEC-126"> Make
org.apache.commons.codec.net.URLCodec charset field final. </action> </release>
-->
+ <release version="1.6.1" date="TBD" description="Feature and fix release.">
+ <action dev="ggregory" type="fix" issue="CODEC-121" due-to="javajohn">
+ QuotedPrintableCodec does not support soft line break per the 'quoted-printable' example on Wikipedia
+ </action>
+ </release>
<release version="1.6" date="20 November 2011" description="Feature and fix release.">
<action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
Use standard Maven directory layout.
Modified: commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java (original)
+++ commons/proper/codec/trunk/src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java Wed Mar 7 15:34:01 2012
@@ -42,20 +42,10 @@ import org.apache.commons.codec.binary.S
* to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
* gateway.
* </p>
- *
- * <p>
- * Note:
- * </p>
- * <p>
- * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
- * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
- * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
- * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
- * </p>
- *
+ *
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
* Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
- *
+ *
* @author Apache Software Foundation
* @since 1.3
* @version $Id$
@@ -76,6 +66,14 @@ public class QuotedPrintableCodec implem
private static final byte TAB = 9;
private static final byte SPACE = 32;
+
+ private static final byte CR = 13;
+
+ private static final byte LF = 10;
+
+ /** Safe line length for quoted printable encoded text. */
+ private static final int SAFE_LENGTH = 73;
+
// Static initializer for printable chars collection
static {
// alpha characters
@@ -111,26 +109,78 @@ public class QuotedPrintableCodec implem
* Encodes byte into its quoted-printable representation.
*
* @param b
- * byte to encode
+ * byte to encode
* @param buffer
- * the buffer to write to
+ * the buffer to write to
+ * @return The number of bytes written to the <code>buffer</code>
*/
- private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
+ private static final int encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
buffer.write(ESCAPE_CHAR);
char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
buffer.write(hex1);
buffer.write(hex2);
+ return 3;
+ }
+
+ /**
+ * Return the byte at position <code>index</code> of the byte array and
+ * make sure it is unsigned.
+ *
+ * @param index
+ * position in the array
+ * @param bytes
+ * the byte array
+ * @return the unsigned octet at position <code>index</code> from the array
+ */
+ private static int getUnsignedOctet(final int index, final byte[] bytes) {
+ int b = bytes[index];
+ if (b < 0) {
+ b = 256 + b;
+ }
+ return b;
+ }
+
+ /**
+ * Write a byte to the buffer.
+ *
+ * @param b
+ * byte to write
+ * @param encode
+ * indicates whether the octet shall be encoded
+ * @param buffer
+ * the buffer to write to
+ * @return the number of bytes that have been written to the buffer
+ */
+ private static int encodeByte(final int b, final boolean encode,
+ final ByteArrayOutputStream buffer) {
+ if (encode) {
+ return encodeQuotedPrintable(b, buffer);
+ } else {
+ buffer.write(b);
+ return 1;
+ }
+ }
+
+ /**
+ * Checks whether the given byte is whitespace.
+ *
+ * @param b
+ * byte to be checked
+ * @return <code>true</code> if the byte is either a space or tab character
+ */
+ private static boolean isWhitespace(final int b) {
+ return b == SPACE || b == TAB;
}
/**
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
- *
+ *
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521 and is suitable for encoding binary data and unformatted text.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
+ * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
- *
+ *
* @param printable
* bitset of characters deemed quoted-printable
* @param bytes
@@ -145,29 +195,59 @@ public class QuotedPrintableCodec implem
printable = PRINTABLE_CHARS;
}
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- for (byte c : bytes) {
- int b = c;
- if (b < 0) {
- b = 256 + b;
- }
- if (printable.get(b)) {
- buffer.write(b);
+ int pos = 1;
+ // encode up to buffer.length - 3, the last three octets will be treated
+ // separately for simplification of note #3
+ for (int i = 0; i < bytes.length - 3; i++) {
+ int b = getUnsignedOctet(i, bytes);
+ if (pos < SAFE_LENGTH) {
+ // up to this length it is safe to add any byte, encoded or not
+ pos += encodeByte(b, !printable.get(b), buffer);
} else {
- encodeQuotedPrintable(b, buffer);
+ // rule #3: whitespace at the end of a line *must* be encoded
+ encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
+
+ // rule #5: soft line break
+ buffer.write(ESCAPE_CHAR);
+ buffer.write(CR);
+ buffer.write(LF);
+ pos = 1;
}
}
+
+ // rule #3: whitespace at the end of a line *must* be encoded
+ // if we would do a soft break line after this octet, encode whitespace
+ int b = getUnsignedOctet(bytes.length - 3, bytes);
+ boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5);
+ pos += encodeByte(b, encode, buffer);
+
+ // note #3: '=' *must not* be the ultimate or penultimate character
+ // simplification: if < 6 bytes left, do a soft line break as we may need
+ // exactly 6 bytes space for the last 2 bytes
+ if (pos > SAFE_LENGTH - 2) {
+ buffer.write(ESCAPE_CHAR);
+ buffer.write(CR);
+ buffer.write(LF);
+ }
+ for (int i = bytes.length - 2; i < bytes.length; i++) {
+ b = getUnsignedOctet(i, bytes);
+ // rule #3: trailing whitespace shall be encoded
+ encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b));
+ encodeByte(b, encode, buffer);
+ }
+
return buffer.toByteArray();
}
/**
- * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
- * back to their original representation.
- *
+ * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are
+ * converted back to their original representation.
+ *
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
+ * defined in RFC 1521.
* </p>
- *
+ *
* @param bytes
* array of quoted-printable characters
* @return array of original bytes
@@ -180,16 +260,21 @@ public class QuotedPrintableCodec implem
}
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
for (int i = 0; i < bytes.length; i++) {
- int b = bytes[i];
+ final int b = bytes[i];
if (b == ESCAPE_CHAR) {
try {
- int u = Utils.digit16(bytes[++i]);
+ // if the next octet is a CR we have found a soft line break
+ if (bytes[++i] == CR) {
+ continue;
+ }
+ int u = Utils.digit16(bytes[i]);
int l = Utils.digit16(bytes[++i]);
buffer.write((char) ((u << 4) + l));
} catch (ArrayIndexOutOfBoundsException e) {
throw new DecoderException("Invalid quoted-printable encoding", e);
}
- } else {
+ } else if (b != CR && b != LF) {
+ // every other octet is appended except for CR & LF
buffer.write(b);
}
}
@@ -200,8 +285,8 @@ public class QuotedPrintableCodec implem
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
*
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521 and is suitable for encoding binary data and unformatted text.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
+ * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
*
* @param bytes
@@ -217,8 +302,8 @@ public class QuotedPrintableCodec implem
* back to their original representation.
*
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+ * as defined in RFC 1521.
* </p>
*
* @param bytes
@@ -235,8 +320,8 @@ public class QuotedPrintableCodec implem
* Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
*
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521 and is suitable for encoding binary data.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+ * as defined in RFC 1521 and is suitable for encoding binary data.
* </p>
*
* @param pString
@@ -365,8 +450,8 @@ public class QuotedPrintableCodec implem
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
*
* <p>
- * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
- * RFC 1521 and is suitable for encoding binary data and unformatted text.
+ * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
+ * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
* </p>
*
* @param pString
Modified: commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java?rev=1297993&r1=1297992&r2=1297993&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java (original)
+++ commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java Wed Mar 7 15:34:01 2012
@@ -24,7 +24,6 @@ import static org.junit.Assert.fail;
import org.apache.commons.codec.CharEncoding;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.EncoderException;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -265,30 +264,92 @@ public class QuotedPrintableCodecTest {
}
@Test
- @Ignore
- /**
- * The QuotedPrintableCodec documentation states that this is not supported.
- *
- * @throws Exception
- * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
- */
public void testSoftLineBreakDecode() throws Exception {
- String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
- String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
- assertEquals(expected, new QuotedPrintableCodec().decode(qpdata));
+ String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics " +
+ "is the most beautiful branch of philosophy.";
+ String expected = "If you believe that truth=beauty, then surely mathematics " +
+ "is the most beautiful branch of philosophy.";
+
+ QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+ assertEquals(expected, qpcodec.decode(qpdata));
+
+ String encoded = qpcodec.encode(expected);
+ assertEquals(expected, qpcodec.decode(encoded));
}
@Test
- @Ignore
- /**
- * The QuotedPrintableCodec documentation states that this is not supported.
- *
- * @throws Exception
- * @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
- */
public void testSoftLineBreakEncode() throws Exception {
- String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
- String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
- assertEquals(qpdata, new QuotedPrintableCodec().encode(expected));
+ String qpdata = "If you believe that truth=3Dbeauty, then surely mathematics is the most " +
+ "b=\r\neautiful branch of philosophy.";
+ String expected = "If you believe that truth=beauty, then surely mathematics is the most " +
+ "beautiful branch of philosophy.";
+
+ QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+ assertEquals(qpdata, qpcodec.encode(expected));
+
+ String decoded = qpcodec.decode(qpdata);
+ assertEquals(qpdata, qpcodec.encode(decoded));
+ }
+
+ @Test
+ public void testSkipNotEncodedCRLF() throws Exception {
+ String qpdata = "CRLF in an\n encoded text should be=20=\r\n\rskipped in the\r decoding.";
+ String expected = "CRLF in an encoded text should be skipped in the decoding.";
+
+ QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+ assertEquals(expected, qpcodec.decode(qpdata));
+
+ String encoded = qpcodec.encode(expected);
+ assertEquals(expected, qpcodec.decode(encoded));
+ }
+
+ @Test
+ public void testTrailingSpecial() throws Exception {
+ final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+
+ String plain ="This is a example of a quoted-printable text file. This might contain sp=cial chars.";
+ String expected = "This is a example of a quoted-printable text file. This might contain sp=3D=\r\ncial chars.";
+ assertEquals(expected, qpcodec.encode(plain));
+
+ plain ="This is a example of a quoted-printable text file. This might contain ta\tbs as well.";
+ expected = "This is a example of a quoted-printable text file. This might contain ta=09=\r\nbs as well.";
+ assertEquals(expected, qpcodec.encode(plain));
}
+
+ @Test
+ public void testUltimateSoftBreak() throws Exception {
+ final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
+
+ String plain ="This is a example of a quoted-printable text file. There is no end to it\t";
+ String expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=09";
+
+ assertEquals(expected, qpcodec.encode(plain));
+
+ plain ="This is a example of a quoted-printable text file. There is no end to it ";
+ expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=20";
+
+ assertEquals(expected, qpcodec.encode(plain));
+
+ // whitespace before soft break
+ plain ="This is a example of a quoted-printable text file. There is no end to ";
+ expected = "This is a example of a quoted-printable text file. There is no end to=20=\r\n =20";
+
+ assertEquals(expected, qpcodec.encode(plain));
+
+ // non-printable character before soft break
+ plain ="This is a example of a quoted-printable text file. There is no end to= ";
+ expected = "This is a example of a quoted-printable text file. There is no end to=3D=\r\n =20";
+
+ assertEquals(expected, qpcodec.encode(plain));
+ }
+
+ @Test
+ public void testFinalBytes() throws Exception {
+ // whitespace, but does not need to be encoded
+ String plain ="This is a example of a quoted=printable text file. There is no tt";
+ String expected = "This is a example of a quoted=3Dprintable text file. There is no tt";
+
+ assertEquals(expected, new QuotedPrintableCodec().encode(plain));
+ }
+
}