You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mime4j-dev@james.apache.org by mw...@apache.org on 2009/08/29 23:33:14 UTC
svn commit: r809204 - in /james/mime4j/trunk/core/src:
main/java/org/apache/james/mime4j/codec/DecoderUtil.java
test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Author: mwiederkehr
Date: Sat Aug 29 21:33:13 2009
New Revision: 809204
URL: http://svn.apache.org/viewvc?rev=809204&view=rev
Log:
use regular expression to match encoded words (MIME4J-138)
Modified:
james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
Modified: james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=809204&r1=809203&r2=809204&view=diff
==============================================================================
--- james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java (original)
+++ james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java Sat Aug 29 21:33:13 2009
@@ -23,6 +23,8 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -33,7 +35,10 @@
*/
public class DecoderUtil {
private static Log log = LogFactory.getLog(DecoderUtil.class);
-
+
+ private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
+ "(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=", Pattern.DOTALL);
+
/**
* Decodes a string containing quoted-printable encoded data.
*
@@ -123,71 +128,57 @@
byte[] decodedBytes = decodeQuotedPrintable(encodedText);
return new String(decodedBytes, charset);
}
-
+
/**
- * Decodes a string containing encoded words as defined by RFC 2047.
- * Encoded words in have the form
- * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
- * quoted-printable and 'B' or 'b' for Base64.
+ * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+ * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+ * or 'q' for quoted-printable and 'B' or 'b' for base64.
*
* @param body the string to decode.
* @return the decoded string.
*/
public static String decodeEncodedWords(String body) {
- int previousEnd = 0;
- boolean previousWasEncoded = false;
+ int tailIndex = 0;
+ boolean lastMatchValid = false;
StringBuilder sb = new StringBuilder();
- while (true) {
- int begin = body.indexOf("=?", previousEnd);
- int end = begin == -1 ? -1 : body.indexOf("?=", begin + 2);
- if (end == -1) {
- if (previousEnd == 0)
- return body;
+ for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
+ String separator = matcher.group(1);
+ String mimeCharset = matcher.group(2);
+ String encoding = matcher.group(3);
+ String encodedText = matcher.group(4);
- sb.append(body.substring(previousEnd));
- return sb.toString();
- }
- end += 2;
-
- String sep = body.substring(previousEnd, begin);
-
- String decoded = decodeEncodedWord(body, begin, end);
+ String decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText);
if (decoded == null) {
- sb.append(sep);
- sb.append(body.substring(begin, end));
+ sb.append(matcher.group(0));
} else {
- if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
- sb.append(sep);
+ if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
+ sb.append(separator);
}
sb.append(decoded);
}
- previousEnd = end;
- previousWasEncoded = decoded != null;
+ tailIndex = matcher.end();
+ lastMatchValid = decoded != null;
+ }
+
+ if (tailIndex == 0) {
+ return body;
+ } else {
+ sb.append(body.substring(tailIndex));
+ return sb.toString();
}
}
// return null on error
- private static String decodeEncodedWord(String body, int begin, int end) {
- int qm1 = body.indexOf('?', begin + 2);
- if (qm1 == end - 2)
- return null;
-
- int qm2 = body.indexOf('?', qm1 + 1);
- if (qm2 == end - 2)
- return null;
-
- String mimeCharset = body.substring(begin + 2, qm1);
- String encoding = body.substring(qm1 + 1, qm2);
- String encodedText = body.substring(qm2 + 1, end - 2);
-
+ private static String tryDecodeEncodedWord(final String mimeCharset,
+ final String encoding, final String encodedText) {
String charset = CharsetUtil.toJavaCharset(mimeCharset);
if (charset == null) {
if (log.isWarnEnabled()) {
log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
- + body.substring(begin, end) + "' doesn't have a "
+ + recombine(mimeCharset, encoding, encodedText) + "' doesn't have a "
+ "corresponding Java charset");
}
return null;
@@ -195,7 +186,7 @@
if (log.isWarnEnabled()) {
log.warn("Current JDK doesn't support decoding of charset '"
+ charset + "' (MIME charset '" + mimeCharset
- + "' in encoded word '" + body.substring(begin, end)
+ + "' in encoded word '" + recombine(mimeCharset, encoding, encodedText)
+ "')");
}
return null;
@@ -204,7 +195,7 @@
if (encodedText.length() == 0) {
if (log.isWarnEnabled()) {
log.warn("Missing encoded text in encoded word: '"
- + body.substring(begin, end) + "'");
+ + recombine(mimeCharset, encoding, encodedText) + "'");
}
return null;
}
@@ -217,7 +208,7 @@
} else {
if (log.isWarnEnabled()) {
log.warn("Warning: Unknown encoding in encoded word '"
- + body.substring(begin, end) + "'");
+ + recombine(mimeCharset, encoding, encodedText) + "'");
}
return null;
}
@@ -225,18 +216,23 @@
// should not happen because of isDecodingSupported check above
if (log.isWarnEnabled()) {
log.warn("Unsupported encoding in encoded word '"
- + body.substring(begin, end) + "'", e);
+ + recombine(mimeCharset, encoding, encodedText) + "'", e);
}
return null;
} catch (RuntimeException e) {
if (log.isWarnEnabled()) {
log.warn("Could not decode encoded word '"
- + body.substring(begin, end) + "'", e);
+ + recombine(mimeCharset, encoding, encodedText) + "'", e);
}
return null;
}
}
+ private static String recombine(final String mimeCharset,
+ final String encoding, final String encodedText) {
+ return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
+ }
+
// Replace _ with =20
private static String replaceUnderscores(String str) {
// probably faster than String#replace(CharSequence, CharSequence)
Modified: james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=809204&r1=809203&r2=809204&view=diff
==============================================================================
--- james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java (original)
+++ james/mime4j/trunk/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java Sat Aug 29 21:33:13 2009
@@ -100,6 +100,16 @@
assertEquals("a b", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="));
}
+ // see MIME4J-138
+ public void testEncodedTextMayStartWithAnEqualsSign() {
+ assertEquals(" foo", DecoderUtil.decodeEncodedWords("=?utf-8?Q?=20foo?="));
+ assertEquals("Re: How to place a view at the bottom with a 100% width",
+ DecoderUtil.decodeEncodedWords("=?utf-8?Q?Re:=20How=20to=20place=20a=20view=20at=20the=20bottom?= "
+ + "=?utf-8?Q?=20with=20a=20100%=20width?="));
+ assertEquals("Test \u00fc and more",
+ DecoderUtil.decodeEncodedWords("Test =?ISO-8859-1?Q?=FC_?= =?ISO-8859-1?Q?and_more?="));
+ }
+
public void testNonWhiteSpaceBetweenEncodedWordsIsRetained() {
assertEquals("a b c", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?= b =?ISO-8859-1?Q?c?="));
assertEquals("a\rb\nc", DecoderUtil.decodeEncodedWords("=?ISO-8859-1?Q?a?=\rb\n=?ISO-8859-1?Q?c?="));