You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mime4j-dev@james.apache.org by ol...@apache.org on 2013/09/25 10:15:23 UTC

svn commit: r1526130 - in /james/mime4j/branches/apache-mime4j-0.7/core/src: main/java/org/apache/james/mime4j/codec/DecoderUtil.java test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java

Author: olegk
Date: Wed Sep 25 08:15:22 2013
New Revision: 1526130

URL: http://svn.apache.org/r1526130
Log:
MIME4J-211: Add an optional fallback charset argument to DecoderUtil.decodeEncodedWords
Contributed by TzeKai Lee <chikei at gmail.com>

Modified:
    james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
    james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java

Modified: james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java?rev=1526130&r1=1526129&r2=1526130&view=diff
==============================================================================
--- james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java (original)
+++ james/mime4j/branches/apache-mime4j-0.7/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java Wed Sep 25 08:15:22 2013
@@ -142,6 +142,38 @@ public class DecoderUtil {
      * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
      */
     public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
+        return decodeEncodedWords(body, monitor, null);
+    }
+
+    /**
+     * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+     * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+     * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+     * charset if charset in encoded words is invalid.
+     *
+     * @param body the string to decode
+     * @param fallback the fallback Charset to be used.
+     * @return the decoded string.
+     * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
+     */
+    public static String decodeEncodedWords(String body, Charset fallback) throws IllegalArgumentException {
+        return decodeEncodedWords(body, null, fallback);
+    }
+
+    /**
+     * Decodes a string containing encoded words as defined by RFC 2047. Encoded
+     * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
+     * or 'q' for quoted-printable and 'B' or 'b' for base64. Using fallback
+     * charset if charset in encoded words is invalid.
+     *
+     * @param body the string to decode
+     * @param monitor the DecodeMonitor to be used.
+     * @param fallback the fallback Charset to be used.
+     * @return the decoded string.
+     * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
+     */
+    public static String decodeEncodedWords(String body, DecodeMonitor monitor, Charset fallback)
+            throws IllegalArgumentException {
         int tailIndex = 0;
         boolean lastMatchValid = false;
 
@@ -154,7 +186,7 @@ public class DecoderUtil {
             String encodedText = matcher.group(4);
 
             String decoded = null;
-            decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
+            decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback);
             if (decoded == null) {
                 sb.append(matcher.group(0));
             } else {
@@ -178,12 +210,16 @@ public class DecoderUtil {
 
     // return null on error
     private static String tryDecodeEncodedWord(final String mimeCharset,
-            final String encoding, final String encodedText, final DecodeMonitor monitor) {
+            final String encoding, final String encodedText, final DecodeMonitor monitor, final Charset fallback) {
         Charset charset = CharsetUtil.lookup(mimeCharset);
         if (charset == null) {
-            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
-                    "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
-            return null;
+            if(fallback == null) {
+                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
+                        "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
+                return null;
+            } else {
+                charset = fallback;
+            }
         }
 
         if (encodedText.length() == 0) {

Modified: james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java?rev=1526130&r1=1526129&r2=1526130&view=diff
==============================================================================
--- james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java (original)
+++ james/mime4j/branches/apache-mime4j-0.7/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java Wed Sep 25 08:15:22 2013
@@ -20,6 +20,7 @@
 package org.apache.james.mime4j.codec;
 
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 
 import junit.framework.TestCase;
 
@@ -67,6 +68,18 @@ public class DecoderUtilTest extends Tes
                 + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
     }
 
+    public void testDecodeJapaneseEncodedWordsWithFallback(){
+        String enc = "=?random?B?GyRCTCQbKEobJEI+NRsoShskQkJ6GyhKGyRCOS0bKEo=?= "
+                + "=?garbage?B?GyRCOXAbKEobJEIiKBsoShskQiU1GyhKGyRCJSQbKEo=?= "
+                + "=?charset?B?GyRCJUkbKEobJEIlUxsoShskQiU4GyhKGyRCJU0bKEo=?= "
+                + "=?name?B?GyRCJTkbKEobJEIkThsoShskQjdoGyhKGyRCRGobKEo=?= "
+                + "=?trash?B?GyRCSEcbKEobJEIkRxsoShskQiQ5GyhKGyRCISobKEo=?=";
+
+        String dec = DecoderUtil.decodeEncodedWords(enc, Charset.forName("ISO-2022-JP"));
+        assertEquals("\u672A\u627F\u8AFE\u5E83\u544A\u203B\u30B5\u30A4\u30C9\u30D3"
+                + "\u30B8\u30CD\u30B9\u306E\u6C7A\u5B9A\u7248\u3067\u3059\uFF01", dec);
+    }
+
     public void testInvalidEncodedWordsAreIgnored() {
         assertEquals("=?iso8859-1?Q?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?Q?="));
         assertEquals("=?iso8859-1?b?=", DecoderUtil.decodeEncodedWords("=?iso8859-1?b?="));