You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mime4j-dev@james.apache.org by ie...@apache.org on 2014/09/01 10:52:30 UTC

svn commit: r1621705 - in /james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator: FromLinePatterns.java MboxIterator.java

Author: ieugen
Date: Mon Sep  1 08:52:29 2014
New Revision: 1621705

URL: http://svn.apache.org/r1621705
Log:
MIME4J-242 Added more geenric From line matcher

* #fixes MIME4J-242

Modified:
    james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/FromLinePatterns.java
    james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/MboxIterator.java

Modified: james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/FromLinePatterns.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/FromLinePatterns.java?rev=1621705&r1=1621704&r2=1621705&view=diff
==============================================================================
--- james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/FromLinePatterns.java (original)
+++ james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/FromLinePatterns.java Mon Sep  1 08:52:29 2014
@@ -29,10 +29,12 @@ public interface FromLinePatterns {
      * Match a line like: From ieugen@apache.org Fri Sep 09 14:04:52 2011
      */
     static final String DEFAULT = "^From \\S+@\\S.*\\d{4}$";
-
     /**
-     * Other type of From_ line: From MAILER-DAEMON Wed Oct 05 21:54:09 2011
+     * Matches other type of From_ line (without @):
+     * From MAILER-DAEMON Wed Oct 05 21:54:09 2011
+     * Thunderbird mbox content: From - Wed Apr 02 06:51:08 2014
      */
+    static final String DEFAULT2 = "^From \\S+.*\\d{4}$";
 
 
 }

Modified: james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/MboxIterator.java
URL: http://svn.apache.org/viewvc/james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/MboxIterator.java?rev=1621705&r1=1621704&r2=1621705&view=diff
==============================================================================
--- james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/MboxIterator.java (original)
+++ james/mime4j/trunk/mbox/src/main/java/org/apache/james/mime4j/mboxiterator/MboxIterator.java Mon Sep  1 08:52:29 2014
@@ -18,7 +18,12 @@
  ****************************************************************/
 package org.apache.james.mime4j.mboxiterator;
 
-import java.io.*;
+import java.io.CharConversionException;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.nio.Buffer;
 import java.nio.CharBuffer;
 import java.nio.MappedByteBuffer;
@@ -46,7 +51,7 @@ public class MboxIterator implements Ite
 
     private final FileInputStream theFile;
     private final CharBuffer mboxCharBuffer;
-    private Matcher fromLineMathcer;
+    private Matcher fromLineMatcher;
     private boolean fromLineFound;
     private final MappedByteBuffer byteBuffer;
     private final CharsetDecoder DECODER;
@@ -58,6 +63,7 @@ public class MboxIterator implements Ite
     private final Pattern MESSAGE_START;
     private int findStart = -1;
     private int findEnd = -1;
+    private final File mbox;
 
     private MboxIterator(final File mbox,
                          final Charset charset,
@@ -70,19 +76,30 @@ public class MboxIterator implements Ite
         this.MESSAGE_START = Pattern.compile(regexpPattern, regexpFlags);
         this.DECODER = charset.newDecoder();
         this.mboxCharBuffer = CharBuffer.allocate(MAX_MESSAGE_SIZE);
+        this.mbox = mbox;
         this.theFile = new FileInputStream(mbox);
         this.byteBuffer = theFile.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, theFile.getChannel().size());
         initMboxIterator();
     }
 
-    private void initMboxIterator() throws IOException, CharConversionException {
+    /**
+     * initialize the Mailbox iterator
+     *
+     * @throws IOException
+     * @throws CharConversionException
+     */
+    protected void initMboxIterator() throws IOException {
         decodeNextCharBuffer();
-        fromLineMathcer = MESSAGE_START.matcher(mboxCharBuffer);
-        fromLineFound = fromLineMathcer.find();
+        fromLineMatcher = MESSAGE_START.matcher(mboxCharBuffer);
+        fromLineFound = fromLineMatcher.find();
         if (fromLineFound) {
-            saveFindPositions(fromLineMathcer);
-        } else if (fromLineMathcer.hitEnd()) {
-            throw new IllegalArgumentException("File does not contain From_ lines! Maybe not be a vaild Mbox.");
+            saveFindPositions(fromLineMatcher);
+        } else if (fromLineMatcher.hitEnd()) {
+            String path = "";
+            if (mbox != null)
+                path = mbox.getPath();
+            throw new IllegalArgumentException("File " + path + " does not contain From_ lines that match the pattern '"
+                    + MESSAGE_START.pattern() + "'! Maybe not be a valid Mbox or wrong matcher.");
         }
     }
 
@@ -139,12 +156,12 @@ public class MboxIterator implements Ite
          */
         public CharBufferWrapper next() {
             final CharBuffer message;
-            fromLineFound = fromLineMathcer.find();
+            fromLineFound = fromLineMatcher.find();
             if (fromLineFound) {
                 message = mboxCharBuffer.slice();
                 message.position(findEnd + 1);
-                saveFindPositions(fromLineMathcer);
-                message.limit(fromLineMathcer.start());
+                saveFindPositions(fromLineMatcher);
+                message.limit(fromLineMatcher.start());
             } else {
                 /* We didn't find other From_ lines this means either:
                  *  - we reached end of mbox and no more messages
@@ -163,17 +180,17 @@ public class MboxIterator implements Ite
                     } catch (CharConversionException ex) {
                         throw new RuntimeException(ex);
                     }
-                    fromLineMathcer = MESSAGE_START.matcher(mboxCharBuffer);
-                    fromLineFound = fromLineMathcer.find();
+                    fromLineMatcher = MESSAGE_START.matcher(mboxCharBuffer);
+                    fromLineFound = fromLineMatcher.find();
                     if (fromLineFound) {
-                        saveFindPositions(fromLineMathcer);
+                        saveFindPositions(fromLineMatcher);
                     }
                     message = mboxCharBuffer.slice();
-                    message.position(fromLineMathcer.end() + 1);
-                    fromLineFound = fromLineMathcer.find();
+                    message.position(fromLineMatcher.end() + 1);
+                    fromLineFound = fromLineMatcher.find();
                     if (fromLineFound) {
-                        saveFindPositions(fromLineMathcer);
-                        message.limit(fromLineMathcer.start());
+                        saveFindPositions(fromLineMatcher);
+                        message.limit(fromLineMatcher.start());
                     }
                 } else {
                     message = mboxCharBuffer.slice();