You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mime4j-dev@james.apache.org by ol...@apache.org on 2014/10/31 11:07:00 UTC

svn commit: r1635743 - in /james/mime4j/trunk: core/src/main/java/org/apache/james/mime4j/stream/ dom/src/main/java/org/apache/james/mime4j/message/ dom/src/test/java/org/apache/james/mime4j/dom/ dom/src/test/java/org/apache/james/mime4j/message/

Author: olegk
Date: Fri Oct 31 10:07:00 2014
New Revision: 1635743

URL: http://svn.apache.org/r1635743
Log:
MIME4J-218: Content-Type fallback character set
contributed by Wolfgang Fahl <wf at bitplan.com>

Added:
    james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java   (with props)
Modified:
    james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
    james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
    james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
    james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
    james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
    james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
    james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java

Modified: james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java (original)
+++ james/mime4j/trunk/core/src/main/java/org/apache/james/mime4j/stream/DefaultFieldBuilder.java Fri Oct 31 10:07:00 2014
@@ -60,7 +60,7 @@ public class DefaultFieldBuilder impleme
         }
         int len = line.length();
         if (this.maxlen > 0 && this.buf.length() + len >= this.maxlen) {
-            throw new MaxHeaderLengthLimitException("Maximum header length limit exceeded");
+            throw new MaxHeaderLengthLimitException("Maximum header length limit (" + this.maxlen + ") exceeded");
         }
         this.buf.append(line.buffer(), 0, line.length());
     }

Modified: james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java (original)
+++ james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/BasicBodyFactory.java Fri Oct 31 10:07:00 2014
@@ -26,6 +26,7 @@ import java.io.Reader;
 import java.io.StringReader;
 import java.io.UnsupportedEncodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
 
 import org.apache.james.mime4j.Charsets;
@@ -42,12 +43,44 @@ public class BasicBodyFactory implements
 
     public static final BasicBodyFactory INSTANCE = new BasicBodyFactory();
 
-    private static Charset resolveCharset(final String mimeCharset) throws UnsupportedEncodingException {
-        try {
-            return mimeCharset != null ? Charset.forName(mimeCharset) : null;
-        } catch (UnsupportedCharsetException ex) {
-            throw new UnsupportedEncodingException(mimeCharset);
+    private final boolean lenient;
+
+    public BasicBodyFactory() {
+        this(true);
+    }
+
+    public BasicBodyFactory(final boolean lenient) {
+        this.lenient = lenient;
+    }
+
+    /**
+     * select the Charset for the given mimeCharset string
+     * 
+     *  if you need support for non standard or invalid mimeCharset specifications
+     *  you might want to create your own derived BodyFactory extending BasicBodyFactory and
+     *  overriding this method as suggested by:
+     *    https://issues.apache.org/jira/browse/MIME4J-218
+     *  
+     *  the default behavior is lenient, invalid mimeCharset specifications will return the defaultCharset
+     * 
+     *  @param mimeCharset - the string specification for a Charset e.g. "UTF-8"
+     *  @throws UnsupportedEncodingException if the mimeCharset is invalid
+     */ 
+    protected Charset resolveCharset(final String mimeCharset) throws UnsupportedEncodingException {
+        if (mimeCharset != null) {
+            try {
+                return Charset.forName(mimeCharset);
+            } catch (UnsupportedCharsetException ex) {
+                if (!lenient) {
+                    throw new UnsupportedEncodingException(mimeCharset);
+                }
+            } catch (IllegalCharsetNameException ex) {
+                if (!lenient) {
+                    throw new UnsupportedEncodingException(mimeCharset);
+                }
+            }
         }
+        return Charset.defaultCharset();
     }
 
     public TextBody textBody(final String text, final String mimeCharset) throws UnsupportedEncodingException {

Modified: james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java (original)
+++ james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/DefaultMessageBuilder.java Fri Oct 31 10:07:00 2014
@@ -302,7 +302,7 @@ public class DefaultMessageBuilder imple
             BodyDescriptorBuilder bdb = bodyDescBuilder != null ? bodyDescBuilder :
                 new DefaultBodyDescriptorBuilder(null, fieldParser != null ? fieldParser :
                     strict ? DefaultFieldParser.getParser() : LenientFieldParser.getParser(), mon);
-            BodyFactory bf = bodyFactory != null ? bodyFactory : new BasicBodyFactory();
+            BodyFactory bf = bodyFactory != null ? bodyFactory : new BasicBodyFactory(!strict);
             MimeStreamParser parser = new MimeStreamParser(cfg, mon, bdb);
             parser.setContentHandler(new ParserStreamContentHandler(message, bf));
             parser.setContentDecoding(contentDecoding);

Modified: james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java (original)
+++ james/mime4j/trunk/dom/src/main/java/org/apache/james/mime4j/message/MessageBuilder.java Fri Oct 31 10:07:00 2014
@@ -309,7 +309,7 @@ public class MessageBuilder extends Abst
     /**
      * Sets binary content of this message with the given MIME type.
      *
-     * @param body
+     * @param bin
      *            the body.
      * @param mimeType
      *            the MIME media type of the specified body
@@ -898,7 +898,7 @@ public class MessageBuilder extends Abst
         BodyDescriptorBuilder currentBodyDescBuilder = bodyDescBuilder != null ? bodyDescBuilder :
                 new DefaultBodyDescriptorBuilder(null, fieldParser != null ? fieldParser :
                         strict ? DefaultFieldParser.getParser() : LenientFieldParser.getParser(), currentMonitor);
-        BodyFactory currentBodyFactory = bodyFactory != null ? bodyFactory : new BasicBodyFactory();
+        BodyFactory currentBodyFactory = bodyFactory != null ? bodyFactory : new BasicBodyFactory(!strict);
         MimeStreamParser parser = new MimeStreamParser(currentConfig, currentMonitor, currentBodyDescBuilder);
 
         Message message = new MessageImpl();

Added: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java?rev=1635743&view=auto
==============================================================================
--- james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java (added)
+++ james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java Fri Oct 31 10:07:00 2014
@@ -0,0 +1,108 @@
+package org.apache.james.mime4j.dom;
+
+import java.io.ByteArrayInputStream;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.james.mime4j.message.BasicBodyFactory;
+import org.apache.james.mime4j.message.DefaultMessageBuilder;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * check that the Charset handling of BasicBodyFactory can be influenced with
+ * the boolean lenient flag
+ * 
+ * @author wf
+ *
+ */
+public class MessageCharsetLenientTest {
+
+	/**
+	 * set up a message with an invalid charset
+	 * 
+	 * @throws Exception
+	 */
+	@Test
+	public void testLenientCharsetHandling() throws Exception {
+		// this list of invalidCharsets is taken from parsing a sample of some 1/4 million e-mails
+		// so all of the showed up in real world e-mails
+		String invalidCharsets[] = {
+				"%CHARSET",
+				"'iso-8859-1'",
+				"'utf-8'",
+				"0",
+				"238",
+				"DEFAULT_CHARSET",
+				"DIN_66003",
+				"ISO 8859-1",
+				"None",
+				"Standard",
+				"UTF-7",
+				"X-CTEXT",
+				"X-UNKNOWN",
+				"\\iso-8859-1\"",
+				"\\us-ascii\"",
+				"ansi_x3.110-1983",
+				"charset=us-ascii",
+				"en",
+				"iso-0-250-250-250-25-0-25",
+				"iso-10646",
+				"iso-1149-1",
+				"iso-2191-1",
+				"iso-3817-4",
+				"iso-4736-8",
+				"iso-5266-7",
+				"iso-5666-3",
+				"iso-5978-6",
+				"iso-6558-5",
+				"iso-7708-8",
+				"iso-8085-5",
+				"iso-8589-0",
+				"iso-8814-4",
+				"iso-8859-1 name=FAQ.htm",
+				"iso-8859-16",
+				"iso-8859-1?",
+				"iso-8859-8-i",
+				"iso-9284-4",
+				"latin-iso8859-1",
+				"unicode-1-1-utf-7",
+				"unknown-8bit",
+				"utf-7",
+				"windows-1250 reply-type=original",
+				"windows-1252 <!DOCTYPE HTML PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>",
+				"x-user-defined", " {$RND_CHARSET$}" };
+		
+		// check with lenient charset handling on and off
+		boolean[] lenientstates = { true, false };
+		// create the message builder
+		DefaultMessageBuilder builder = new DefaultMessageBuilder();
+		// count how many Exception hits we got
+		int invalidCount=0;
+		// test in bosh states
+		for (boolean lenient : lenientstates) {
+			// set how lenient we are
+            builder.setBodyFactory(new BasicBodyFactory(lenient));
+			// check the list of invalid Charsets
+			for (String invalidCharset : invalidCharsets) {
+				// create a message with the charset 
+				String charsetContent = "Subject: my subject\r\n"
+						+ "Content-Type: text/plain; charset=" + invalidCharset + "\r\n"
+						+ "Strange charset isn't it?\r" + "\r\n";
+        // try parsing it
+				try {
+					Message message = builder.parseMessage(new ByteArrayInputStream(
+							charsetContent.getBytes("UTF-8")));
+					// check some message attribute
+					Assert.assertEquals("text/plain", message.getMimeType());
+					// if we get here we had a lenient mode - in non lenient an exception would have been thrown
+					Assert.assertTrue("Charset:"+invalidCharset+" should not be allowed when lenient is "+lenient,lenient);
+				} catch (UnsupportedEncodingException ex) {
+					Assert.assertFalse("Charset:"+invalidCharset+" should not throw an exception when lenient is "+lenient,lenient);
+					invalidCount++;
+				}
+			}
+		} // for
+		Assert.assertEquals(invalidCharsets.length,invalidCount);
+	}
+
+}

Propchange: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:keywords = Date Revision

Propchange: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/dom/MessageCharsetLenientTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java (original)
+++ james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/BodyPartBuilderTest.java Fri Oct 31 10:07:00 2014
@@ -20,13 +20,10 @@
 package org.apache.james.mime4j.message;
 
 import java.io.InputStream;
-import java.util.List;
 
 import org.apache.james.mime4j.Charsets;
 import org.apache.james.mime4j.dom.Body;
-import org.apache.james.mime4j.dom.TextBody;
 import org.apache.james.mime4j.dom.field.ContentTypeField;
-import org.apache.james.mime4j.stream.Field;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;

Modified: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java (original)
+++ james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/EntityImplTest.java Fri Oct 31 10:07:00 2014
@@ -23,9 +23,6 @@ import org.apache.james.mime4j.dom.Body;
 import org.apache.james.mime4j.dom.Entity;
 import org.apache.james.mime4j.dom.Header;
 import org.apache.james.mime4j.field.DefaultFieldParser;
-import org.apache.james.mime4j.message.BasicBodyFactory;
-import org.apache.james.mime4j.message.BodyPart;
-import org.apache.james.mime4j.message.HeaderImpl;
 import org.junit.Assert;
 import org.junit.Test;
 

Modified: james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java?rev=1635743&r1=1635742&r2=1635743&view=diff
==============================================================================
--- james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java (original)
+++ james/mime4j/trunk/dom/src/test/java/org/apache/james/mime4j/message/MessageBuilderTest.java Fri Oct 31 10:07:00 2014
@@ -23,7 +23,6 @@ import java.io.InputStream;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
-import java.util.List;
 import java.util.TimeZone;
 
 import org.apache.james.mime4j.Charsets;
@@ -39,7 +38,6 @@ import org.apache.james.mime4j.dom.field
 import org.apache.james.mime4j.field.DefaultFieldParser;
 import org.apache.james.mime4j.field.Fields;
 import org.apache.james.mime4j.field.address.AddressBuilder;
-import org.apache.james.mime4j.stream.Field;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;