You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by no...@apache.org on 2011/06/07 21:04:39 UTC

svn commit: r1133130 - in /james/mailbox/trunk: api/src/main/java/org/apache/james/mailbox/ store/src/main/java/org/apache/james/mailbox/store/search/ store/src/main/java/org/apache/james/mailbox/store/search/comparator/ store/src/main/java/org/apache/...

Author: norman
Date: Tue Jun  7 19:04:38 2011
New Revision: 1133130

URL: http://svn.apache.org/viewvc?rev=1133130&view=rev
Log:
Add more Base-Subject extracting tests and fix a few bugs in there. See MAILBOX-78

Modified:
    james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java
    james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
    james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
    james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java
    james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java

Modified: james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java?rev=1133130&r1=1133129&r2=1133130&view=diff
==============================================================================
--- james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java (original)
+++ james/mailbox/trunk/api/src/main/java/org/apache/james/mailbox/SearchQuery.java Tue Jun  7 19:04:38 2011
@@ -102,6 +102,11 @@ public class SearchQuery {
             To,
             
             /**
+             * 
+             */
+            SentDate,
+            
+            /**
              * Uid of the message. This is the DEFAULT if no other is specified
              */
             Uid

Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java?rev=1133130&r1=1133129&r2=1133130&view=diff
==============================================================================
--- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java (original)
+++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/SearchUtil.java Tue Jun  7 19:04:38 2011
@@ -19,6 +19,7 @@
 package org.apache.james.mailbox.store.search;
 
 import java.nio.charset.Charset;
+import java.util.Locale;
 
 import org.apache.james.mime4j.codec.DecoderUtil;
 import org.apache.james.mime4j.util.MimeUtil;
@@ -29,7 +30,7 @@ public class SearchUtil {
     private final static String SUBJ_FWD_HDR = "[fwd:";
     private final static String SUBJ_FWD_TRL = "]";
     private final static String RE = "re";
-    private final static String FWD = "FWD";
+    private final static String FWD = "fwd";
     private final static String FW = "fw";
     private final static char WS = ' ';
     private final static char OPEN_SQUARE_BRACKED = '[';
@@ -133,7 +134,7 @@ public class SearchUtil {
                     //    base, then remove the prefix text.
                     decodedSubjectLength = decodedSubject.length();
                     String subj = removeBlob(decodedSubject);
-                    
+
                     // check if it will leave a non-empty subject
                     if (subj.length() > 0) {
                         decodedSubject = subj;
@@ -150,14 +151,14 @@ public class SearchUtil {
                         break;
                     } 
                 }
+                String lowcaseSubject = decodedSubject.toLowerCase(Locale.US);
                 
-                if (decodedSubject.startsWith(SUBJ_FWD_HDR) && decodedSubject.endsWith(SUBJ_FWD_TRL)) {
+                if (lowcaseSubject.startsWith(SUBJ_FWD_HDR) && lowcaseSubject.endsWith(SUBJ_FWD_TRL)) {
                     //    (6) If the resulting text begins with the subj-fwd-hdr ABNF and
                     //    ends with the subj-fwd-trl ABNF, remove the subj-fwd-hdr and
                     //    subj-fwd-trl and repeat from step (2).
                     decodedSubject = decodedSubject.substring(SUBJ_FWD_HDR.length(), decodedSubject.length() - SUBJ_FWD_TRL.length());
                     decodedSubjectLength = decodedSubject.length();
-
                 } else {
                     break;
                 }
@@ -167,13 +168,26 @@ public class SearchUtil {
             return decodedSubject;
     }
  
+    /**
+     * Remove the subj-blob
+     * 
+     *     subj-blob = "[" *BLOBCHAR "]" *WSP
+     *     subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":"
+     * 
+     *     BLOBCHAR = %x01-5a / %x5c / %x5e-7f
+     *     ; any CHAR except '[' and ']' 
+     *     
+     *     
+     * @param subject
+     * @return sub
+     */
     private static String removeSubjectBlob(String subject) {
         String subj = subject;
         while(subj.charAt(0) == OPEN_SQUARE_BRACKED) {
             int length = subj.length();
             subj = removeBlob(subject);
             int i = 0;
-            if (subj.charAt(i) == CLOSE_SQUARE_BRACKED) {
+            if (subj.length() > 0 && subj.charAt(i) == CLOSE_SQUARE_BRACKED) {
                 i++;
             } else {
                 return subject;
@@ -182,62 +196,86 @@ public class SearchUtil {
                 i++;
             }
             subj = subj.substring(i);
-            System.out.println(subj);
-
             if (length == subj.length()) {
                 return subj;
             }
         }
         return subj;
     }
+
+    /**
+     * Remove the subj-leader
+     * 
+     *     subj-leader = (*subj-blob subj-refwd) / WSP
+     *     subj-blob = "[" *BLOBCHAR "]" *WSP
+     *     subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":"
+     * 
+     *     BLOBCHAR = %x01-5a / %x5c / %x5e-7f
+     *     ; any CHAR except '[' and ']' 
+     *     
+     *     
+     * @param subject
+     * @return sub
+     */
     private static String removeSubjLeaders(String subject) {
-        
-        // subj-leader     = (*subj-blob subj-refwd) / WSP
-        // subj-blob       = "[" *BLOBCHAR "]" *WSP
-        // subj-refwd      = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":"
-        //
-        // BLOBCHAR        = %x01-5a / %x5c / %x5e-7f
-        //                ; any CHAR except '[' and ']' */
-        
-        String subj = removeSubjectBlob(subject);
-        
-        
         int subString = 0;
-        if (subj.startsWith(RE)) {
-            subString = RE.length();
-        } else if (subj.startsWith(FWD)) {
-            subString = FWD.length();
-        } else if (subj.startsWith(FW)) {
-            subString  = FW.length();
-        } else {
-            return subject;
-        } 
-        while(subj.charAt(subString) == WS) {
+        while (subject.charAt(subString) == WS) {
             subString++;
         }
-        subj = removeSubjectBlob(subj.substring(subString));
-        if (subj.endsWith(String.valueOf(CLOSE_SQUARE_BRACKED))) {
-            subString = 1;
+        if (subString > 0) {
+            // check if we have matched WSP
+            return subject.substring(subString);
         } else {
-            subString = 0;
-        }
 
-        if (subj.charAt(subString) == COLON) {
-            subString++;
-        } else {
-            return subject;
+            String subj = removeSubjectBlob(subject);
+
+            String lowCaseSubj = subj.toLowerCase(Locale.US);
+            if (lowCaseSubj.startsWith(RE)) {
+                subString = RE.length();
+            } else if (lowCaseSubj.startsWith(FWD)) {
+                subString = FWD.length();
+            } else if (lowCaseSubj.startsWith(FW)) {
+                subString = FW.length();
+            } else {
+                return subject;
+            }
+            while (subj.charAt(subString) == WS) {
+                subString++;
+            }
+
+            /*
+             * subj = removeSubjectBlob(subj.substring(subString)); if
+             * (subj.endsWith(String.valueOf(CLOSE_SQUARE_BRACKED))) { subString
+             * = 1; } else { subString = 0; }
+             */
+
+            if (subj.charAt(subString) == COLON) {
+                subString++;
+            } else {
+                return subject;
+            }
+
+            while (subj.charAt(subString) == WS) {
+                subString++;
+            }
+            return subj.substring(subString);
         }
-        
-        return subj.substring(subString);
     }
+
     
+    /**
+     * remove the remove_subj_trailers
+     * 
+     *    subj-trailer    = "(fwd)" / WSP
+     *  
+     *  
+     * @param decodedSubject
+     * * @return sub
+     */
     private static String removeSubTrailers(String decodedSubject) {
         int subStringStart = 0;
         int subStringEnd = decodedSubject.length();
-        
-        // remove the remove_subj_trailers
-        //
-        // subj-trailer    = "(fwd)" / WSP
+
         int originalSize = decodedSubject.length();
         int curPos = originalSize -1;
         while(true) {
@@ -255,6 +293,15 @@ public class SearchUtil {
         return decodedSubject;
     }
     
+    /**
+     * Remove all blobchars
+     * 
+     *     BLOBCHAR = %x01-5a / %x5c / %x5e-7f
+     *     ; any CHAR except '[' and ']' 
+     *     
+     * @param subject
+     * @return subj
+     */
     private static String removeBlob(String subject) {
         int i = 0;
         char lastChar = Character.UNASSIGNED;
@@ -270,6 +317,8 @@ public class SearchUtil {
         if (lastChar != CLOSE_SQUARE_BRACKED) {
             return subject;
         } else {
+            // the lastChar was a ] so increase the count before substring
+            i++;
             return subject.substring(i);
         }
 

Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java?rev=1133130&r1=1133129&r2=1133130&view=diff
==============================================================================
--- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java (original)
+++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java Tue Jun  7 19:04:38 2011
@@ -81,6 +81,8 @@ public class CombinedComparator implemen
             case Uid:
                 comparator = UidComparator.uid(reverse);
                 break;
+            case SentDate: 
+                comparator = SentDateComparator.sentDate(reverse);
             default:
                 break;
             }

Modified: james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java?rev=1133130&r1=1133129&r2=1133130&view=diff
==============================================================================
--- james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java (original)
+++ james/mailbox/trunk/store/src/main/java/org/apache/james/mailbox/store/search/lucene/LuceneMessageSearchIndex.java Tue Jun  7 19:04:38 2011
@@ -56,9 +56,11 @@ import org.apache.james.mailbox.SearchQu
 import org.apache.james.mailbox.store.mail.model.Mailbox;
 import org.apache.james.mailbox.store.mail.model.Message;
 import org.apache.james.mailbox.store.search.MessageSearchIndex;
+import org.apache.james.mailbox.store.search.SearchUtil;
 import org.apache.james.mime4j.MimeException;
 import org.apache.james.mime4j.descriptor.BodyDescriptor;
 import org.apache.james.mime4j.field.AddressListField;
+import org.apache.james.mime4j.field.DateTimeField;
 import org.apache.james.mime4j.field.address.Address;
 import org.apache.james.mime4j.field.address.AddressList;
 import org.apache.james.mime4j.field.address.Group;
@@ -178,6 +180,8 @@ public class LuceneMessageSearchIndex<Id
     
     public final static String FIRST_FROM_MAILBOX_NAME_FIELD ="firstFromMailboxName";
 
+    public final static String BASE_SUBJECT_FIELD = "baseSubject";
+    
     /**
      * {@link Field} which contain the internalDate of the message with YEAR-Resolution
      */
@@ -252,6 +256,12 @@ public class LuceneMessageSearchIndex<Id
     private final static SortField ARRIVAL_MAILBOX_SORT = new SortField(INTERNAL_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG);
     private final static SortField ARRIVAL_MAILBOX_SORT_REVERSE = new SortField(INTERNAL_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG, true);
 
+    private final static SortField BASE_SUBJECT_SORT = new SortField(BASE_SUBJECT_FIELD, SortField.STRING);
+    private final static SortField BASE_SUBJECT_SORT_REVERSE = new SortField(BASE_SUBJECT_FIELD, SortField.STRING, true);
+    
+    private final static SortField SENT_DATE_SORT = new SortField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG);
+    private final static SortField SENT_DATE_SORT_REVERSE = new SortField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION, SortField.LONG, true);
+    
     public LuceneMessageSearchIndex(Directory directory) throws CorruptIndexException, LockObtainFailedException, IOException {
         this(directory, true);
     }
@@ -353,7 +363,7 @@ public class LuceneMessageSearchIndex<Id
      * @param membership
      * @return document
      */
-    private Document createMessageDocument(Message<?> membership) throws MailboxException{
+    private Document createMessageDocument(final Message<?> membership) throws MailboxException{
         final Document doc = new Document();
         // TODO: Better handling
         doc.add(new Field(MAILBOX_ID_FIELD, membership.getMailboxId().toString().toLowerCase(Locale.US), Store.YES, Index.NOT_ANALYZED));
@@ -378,13 +388,16 @@ public class LuceneMessageSearchIndex<Id
 
             public void headers(Header header) {
                 
+                Date sentDate = null;
+                
                 Iterator<org.apache.james.mime4j.parser.Field> fields = header.iterator();
                 while(fields.hasNext()) {
                     org.apache.james.mime4j.parser.Field f = fields.next();
                     String headerName = f.getName().toLowerCase(Locale.US);
+                    String headerValue = f.getBody().toLowerCase(Locale.US);
                     String fullValue =  f.toString().toLowerCase(Locale.US);
                     doc.add(new Field(HEADERS_FIELD, fullValue, Store.NO, Index.ANALYZED));
-                    doc.add(new Field(PREFIX_HEADER_FIELD + headerName, f.getBody().toLowerCase(Locale.US) ,Store.NO, Index.ANALYZED));
+                    doc.add(new Field(PREFIX_HEADER_FIELD + headerName, headerValue, Store.NO, Index.ANALYZED));
                     
                     // TODO: Handle base subject
                     if (f instanceof AddressListField) {
@@ -437,8 +450,18 @@ public class LuceneMessageSearchIndex<Id
                                 }
                             }
                         }
+                    } else if (headerName.equalsIgnoreCase("Subject")) {
+                        doc.add(new Field(BASE_SUBJECT_FIELD, SearchUtil.getBaseSubject(headerValue), Store.YES, Index.NOT_ANALYZED));
+                    } else if (f instanceof DateTimeField) {
+                        sentDate = ((DateTimeField) f).getDate();
                     }
                 }
+                if (sentDate == null) {
+                    sentDate = membership.getInternalDate();
+                }
+                doc.add(new NumericField(SENT_DATE_FIELD_MILLISECOND_RESOLUTION,Store.NO, true).setLongValue(DateUtils.truncate(sentDate,Calendar.MILLISECOND).getTime()));
+
+
            
             }
             /*
@@ -727,6 +750,13 @@ public class LuceneMessageSearchIndex<Id
                     sf = ARRIVAL_MAILBOX_SORT;
                 }
                 break;
+            case SentDate:
+                if (reverse) {
+                    sf = SENT_DATE_SORT_REVERSE;
+                } else {
+                    sf = SENT_DATE_SORT;
+                }
+                break;
             case Cc:
                 if (reverse) {
                     sf = FIRST_CC_MAILBOX_SORT_REVERSE;
@@ -749,7 +779,11 @@ public class LuceneMessageSearchIndex<Id
                 }
                 break;
             case Subject:
-                // TODO: Fix me
+                if (reverse) {
+                    sf = BASE_SUBJECT_SORT_REVERSE;
+                } else {
+                    sf = BASE_SUBJECT_SORT;
+                }
                 break;
             case To:
                 if (reverse) {
@@ -770,7 +804,15 @@ public class LuceneMessageSearchIndex<Id
                 break;
             }
             if (sf != null) {
+
                 fields.add(sf);
+                
+                // Add the uid sort as tie-breaker
+                if (sf == SENT_DATE_SORT) {
+                    fields.add(UID_SORT);
+                } else if (sf == SENT_DATE_SORT_REVERSE) {
+                    fields.add(UID_SORT_REVERSE);
+                }
             }
         }
         sort.setSort(fields.toArray(new SortField[0]));

Modified: james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java?rev=1133130&r1=1133129&r2=1133130&view=diff
==============================================================================
--- james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java (original)
+++ james/mailbox/trunk/store/src/test/java/org/apache/james/mailbox/store/search/SearchUtilTest.java Tue Jun  7 19:04:38 2011
@@ -1,3 +1,21 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
 package org.apache.james.mailbox.store.search;
 
 import static org.junit.Assert.*;
@@ -31,11 +49,20 @@ public class SearchUtilTest {
         assertEquals("This is my subject", SearchUtil.getBaseSubject(subject));
     }
     
-    /*
+
     @Test
-    public void testRemoveLeaders() {
-        String subject ="[Blah blub] [go] re: This is my subject";
-        assertEquals("This is my subject", SearchUtil.getBaseSubject(subject));
+    public void testSimpleExtraction() {
+        String expectedSubject = "Test";
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("Re: Test"));
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("re: Test"));
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Test"));
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("fwd: Test"));
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Re: Test"));
+        assertEquals(expectedSubject, SearchUtil.getBaseSubject("Fwd: Re: Test (fwd)"));
+    }
+  
+    @Test
+    public void testComplexExtraction() {
+        assertEquals("Test", SearchUtil.getBaseSubject("Re: re:re: fwd:[fwd: \t  Test]  (fwd)  (fwd)(fwd) "));
     }
-    */
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org