You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by ad...@apache.org on 2017/09/08 20:14:51 UTC

[3/7] james-project git commit: JAMES-2137 Provide a method for searching in attachments and associated capability

JAMES-2137 Provide a method for searching in attachments and associated capability


Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/d76f8fc4
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/d76f8fc4
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/d76f8fc4

Branch: refs/heads/master
Commit: d76f8fc402ec64d47b2520f33e01f08eb028f99e
Parents: adad8aa
Author: Antoine Duprat <ad...@linagora.com>
Authored: Tue Sep 5 14:37:48 2017 +0200
Committer: Antoine Duprat <ad...@linagora.com>
Committed: Fri Sep 8 21:53:13 2017 +0200

----------------------------------------------------------------------
 .../apache/james/mailbox/MailboxManager.java    |   3 +-
 .../apache/james/mailbox/model/SearchQuery.java |  17 +-
 ...lasticSearchListeningMessageSearchIndex.java |   4 +-
 .../elasticsearch/query/CriterionConverter.java |   4 +
 .../ElasticSearchIntegrationTest.java           |  22 +-
 mailbox/scanning-search/pom.xml                 |   6 +
 .../mailbox/store/search/PDFTextExtractor.java  |  63 +++
 .../search/SimpleMessageSearchIndexTest.java    |   2 +-
 .../mailbox/store/StoreMailboxManager.java      |   4 +-
 .../mailbox/store/search/MessageSearches.java   |  37 +-
 .../store/search/SimpleMessageSearchIndex.java  |  10 +-
 .../store/SearchUtilsMultipartMixedTest.java    |  73 +--
 .../mailbox/store/SearchUtilsRFC822Test.java    |  37 +-
 .../james/mailbox/store/SearchUtilsTest.java    | 447 ++++++++++---------
 .../search/AbstractMessageSearchIndexTest.java  |  63 ++-
 .../store/src/test/resources/eml/attachment.pdf | Bin 0 -> 9404 bytes
 .../resources/eml/emailWithTextAttachment.eml   |  26 ++
 17 files changed, 531 insertions(+), 287 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/api/src/main/java/org/apache/james/mailbox/MailboxManager.java
----------------------------------------------------------------------
diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/MailboxManager.java b/mailbox/api/src/main/java/org/apache/james/mailbox/MailboxManager.java
index 4e8d2a7..f0b3eb1 100644
--- a/mailbox/api/src/main/java/org/apache/james/mailbox/MailboxManager.java
+++ b/mailbox/api/src/main/java/org/apache/james/mailbox/MailboxManager.java
@@ -102,7 +102,8 @@ public interface MailboxManager extends RequestAware, MailboxListenerSupport {
          *  provide an index on the fields: 
          *  From, To, Cc, Bcc, Subjects, textBody & htmlBody
          */
-        Text
+        Text,
+        Attachment
     }
     
     EnumSet<SearchCapabilities> getSupportedSearchCapabilities();

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/api/src/main/java/org/apache/james/mailbox/model/SearchQuery.java
----------------------------------------------------------------------
diff --git a/mailbox/api/src/main/java/org/apache/james/mailbox/model/SearchQuery.java b/mailbox/api/src/main/java/org/apache/james/mailbox/model/SearchQuery.java
index b6fa953..d3c25af 100644
--- a/mailbox/api/src/main/java/org/apache/james/mailbox/model/SearchQuery.java
+++ b/mailbox/api/src/main/java/org/apache/james/mailbox/model/SearchQuery.java
@@ -543,6 +543,17 @@ public class SearchQuery implements Serializable {
     }
 
     /**
+     * Creates a filter matching messages which has an attachment containing the given text.
+     * 
+     * @param value
+     *            search value
+     * @return <code>Criterion</code>, not null
+     */
+    public static Criterion attachmentContains(String value) {
+        return new TextCriterion(value, Scope.ATTACHMENTS);
+    }
+
+    /**
      * Creates a filter matching messages within any of the given ranges.
      * 
      * @param range
@@ -1092,8 +1103,10 @@ public class SearchQuery implements Serializable {
          */
         TEXT,
 
-        /** Full message content including headers */
-        FULL
+        /** Full message content including headers and attachments */
+        FULL,
+        /** Attachment content */
+        ATTACHMENTS
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/events/ElasticSearchListeningMessageSearchIndex.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/events/ElasticSearchListeningMessageSearchIndex.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/events/ElasticSearchListeningMessageSearchIndex.java
index ec9f1b4..34c0d2b 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/events/ElasticSearchListeningMessageSearchIndex.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/events/ElasticSearchListeningMessageSearchIndex.java
@@ -19,11 +19,13 @@
 package org.apache.james.mailbox.elasticsearch.events;
 
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
+
 import java.util.EnumSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
+
 import javax.inject.Inject;
 
 import org.apache.james.backends.es.ElasticSearchIndexer;
@@ -76,7 +78,7 @@ public class ElasticSearchListeningMessageSearchIndex extends ListeningMessageSe
 
     @Override
     public EnumSet<SearchCapabilities> getSupportedCapabilities() {
-        return EnumSet.of(SearchCapabilities.MultimailboxSearch, SearchCapabilities.Text);
+        return EnumSet.of(SearchCapabilities.MultimailboxSearch, SearchCapabilities.Text, SearchCapabilities.Attachment);
     }
     
     @Override

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
index 9e06c87..59d2029 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
@@ -146,6 +146,10 @@ public class CriterionConverter {
                     .should(matchQuery(JsonMessageConstants.HTML_BODY, textCriterion.getOperator().getValue()))
                     .should(matchQuery(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT,
                         textCriterion.getOperator().getValue()));
+        case ATTACHMENTS:
+            return boolQuery()
+                    .should(matchQuery(JsonMessageConstants.ATTACHMENTS + "." + JsonMessageConstants.Attachment.TEXT_CONTENT,
+                        textCriterion.getOperator().getValue()));
         }
         throw new RuntimeException("Unknown SCOPE for text criterion");
     }

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
index a9344b2..13e1352 100644
--- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
+++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java
@@ -56,10 +56,14 @@ import org.apache.james.mailbox.store.FakeAuthenticator;
 import org.apache.james.mailbox.store.FakeAuthorizator;
 import org.apache.james.mailbox.store.JVMMailboxPathLocker;
 import org.apache.james.mailbox.store.MailboxSessionMapperFactory;
-import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.mail.model.impl.MessageParser;
 import org.apache.james.mailbox.store.search.AbstractMessageSearchIndexTest;
+import org.apache.james.mailbox.tika.TikaConfiguration;
+import org.apache.james.mailbox.tika.TikaContainer;
+import org.apache.james.mailbox.tika.TikaHttpClientImpl;
+import org.apache.james.mailbox.tika.TikaTextExtractor;
 import org.elasticsearch.client.Client;
+import org.junit.ClassRule;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.RuleChain;
@@ -80,6 +84,20 @@ public class ElasticSearchIntegrationTest extends AbstractMessageSearchIndexTest
     @Rule
     public RuleChain ruleChain = RuleChain.outerRule(temporaryFolder).around(embeddedElasticSearch);
 
+    @ClassRule
+    public static TikaContainer tika = new TikaContainer();
+    private TikaTextExtractor textExtractor;
+
+    @Override
+    public void setUp() throws Exception {
+        textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder()
+                .host(tika.getIp())
+                .port(tika.getPort())
+                .timeoutInMillis(tika.getTimeoutInMillis())
+                .build()));
+        super.setUp();
+    }
+
     @Override
     protected void await() {
         embeddedElasticSearch.awaitForElasticSearch();
@@ -107,7 +125,7 @@ public class ElasticSearchIntegrationTest extends AbstractMessageSearchIndexTest
                 MailboxElasticsearchConstants.MAILBOX_INDEX,
                 MailboxElasticsearchConstants.MESSAGE_TYPE),
             new ElasticSearchSearcher(client, new QueryConverter(new CriterionConverter()), SEARCH_SIZE, new InMemoryId.Factory(), messageIdFactory),
-            new MessageToElasticSearchJson(new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES));
+            new MessageToElasticSearchJson(textExtractor, ZoneId.of("Europe/Paris"), IndexAttachments.YES));
         storeMailboxManager = new InMemoryMailboxManager(
             mapperFactory,
             new FakeAuthenticator(),

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/scanning-search/pom.xml
----------------------------------------------------------------------
diff --git a/mailbox/scanning-search/pom.xml b/mailbox/scanning-search/pom.xml
index 5094710..1ee91bc 100644
--- a/mailbox/scanning-search/pom.xml
+++ b/mailbox/scanning-search/pom.xml
@@ -62,6 +62,12 @@
             <scope>test</scope>
         </dependency>
         <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>2.0.7</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
             <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java
----------------------------------------------------------------------
diff --git a/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java
new file mode 100644
index 0000000..1a5b5eb
--- /dev/null
+++ b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/PDFTextExtractor.java
@@ -0,0 +1,63 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one   *
+ * or more contributor license agreements.  See the NOTICE file *
+ * distributed with this work for additional information        *
+ * regarding copyright ownership.  The ASF licenses this file   *
+ * to you under the Apache License, Version 2.0 (the            *
+ * "License"); you may not use this file except in compliance   *
+ * with the License.  You may obtain a copy of the License at   *
+ *                                                              *
+ *   http://www.apache.org/licenses/LICENSE-2.0                 *
+ *                                                              *
+ * Unless required by applicable law or agreed to in writing,   *
+ * software distributed under the License is distributed on an  *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
+ * KIND, either express or implied.  See the License for the    *
+ * specific language governing permissions and limitations      *
+ * under the License.                                           *
+ ****************************************************************/
+package org.apache.james.mailbox.store.search;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.ImmutableMap;
+
+public class PDFTextExtractor implements TextExtractor {
+
+    private static final String PDF_TYPE = "application/pdf";
+
+    @Override
+    public ParsedContent extractContent(InputStream inputStream, String contentType) throws Exception {
+        if (isPDF(contentType)) {
+            return extractTextFromPDF(inputStream);
+        }
+        try {
+            return new ParsedContent(IOUtils.toString(inputStream, Charsets.UTF_8), ImmutableMap.of());
+        } catch (IOException e) {
+            return new ParsedContent(null, ImmutableMap.of());
+        }
+    }
+
+    private boolean isPDF(String contentType) {
+        return contentType.equals(PDF_TYPE);
+    }
+
+    private ParsedContent extractTextFromPDF(InputStream inputStream) {
+        try {
+            return new ParsedContent(
+                    new PDFTextStripper().getText(
+                            PDDocument.load(inputStream)),
+                    ImmutableMap.of());
+        } catch (IOException e) {
+            return new ParsedContent(null, ImmutableMap.of());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndexTest.java
----------------------------------------------------------------------
diff --git a/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndexTest.java b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndexTest.java
index 533ac00..9d5f2ba 100644
--- a/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndexTest.java
+++ b/mailbox/scanning-search/src/test/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndexTest.java
@@ -42,7 +42,7 @@ public class SimpleMessageSearchIndexTest extends AbstractMessageSearchIndexTest
     @Override
     protected void initializeMailboxManager() throws Exception {
         MailboxSessionMapperFactory mapperFactory = new InMemoryMailboxSessionMapperFactory();
-        messageSearchIndex = new SimpleMessageSearchIndex(mapperFactory, mapperFactory);
+        messageSearchIndex = new SimpleMessageSearchIndex(mapperFactory, mapperFactory, new PDFTextExtractor());
         storeMailboxManager = new InMemoryMailboxManager(
             mapperFactory,
             new FakeAuthenticator(),

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java
index 8bdebb2..818cc97 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/StoreMailboxManager.java
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Random;
 import java.util.Set;
+
 import javax.annotation.PostConstruct;
 import javax.inject.Inject;
 
@@ -68,6 +69,7 @@ import org.apache.james.mailbox.store.event.DefaultDelegatingMailboxListener;
 import org.apache.james.mailbox.store.event.DelegatingMailboxListener;
 import org.apache.james.mailbox.store.event.MailboxAnnotationListener;
 import org.apache.james.mailbox.store.event.MailboxEventDispatcher;
+import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
 import org.apache.james.mailbox.store.mail.AnnotationMapper;
 import org.apache.james.mailbox.store.mail.MailboxMapper;
 import org.apache.james.mailbox.store.mail.model.Mailbox;
@@ -242,7 +244,7 @@ public class StoreMailboxManager implements MailboxManager {
         }
 
         if (index == null) {
-            index = new SimpleMessageSearchIndex(mailboxSessionMapperFactory, mailboxSessionMapperFactory);
+            index = new SimpleMessageSearchIndex(mailboxSessionMapperFactory, mailboxSessionMapperFactory, new DefaultTextExtractor());
         }
         if (index instanceof ListeningMessageSearchIndex) {
             this.addGlobalListener((MailboxListener) index, null);

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/MessageSearches.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/MessageSearches.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/MessageSearches.java
index 87e6c24..d34c522 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/MessageSearches.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/MessageSearches.java
@@ -35,11 +35,16 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Optional;
 import java.util.TimeZone;
+
 import javax.mail.Flags;
 
 import org.apache.james.mailbox.MessageUid;
 import org.apache.james.mailbox.exception.MailboxException;
 import org.apache.james.mailbox.exception.UnsupportedSearchException;
+import org.apache.james.mailbox.extractor.ParsedContent;
+import org.apache.james.mailbox.extractor.TextExtractor;
+import org.apache.james.mailbox.model.Attachment;
+import org.apache.james.mailbox.model.MessageAttachment;
 import org.apache.james.mailbox.model.MessageResult.Header;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.model.SearchQuery.AddressType;
@@ -72,7 +77,9 @@ import org.apache.james.mime4j.utils.search.MessageMatcher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.github.fge.lambdas.Throwing;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 
 /**
@@ -90,18 +97,14 @@ public class MessageSearches implements Iterable<SimpleMessageSearchIndex.Search
         .setMaxLineLen(-1)
         .build();
 
-    private Iterator<MailboxMessage> messages;
-    private SearchQuery query;
+    private final Iterator<MailboxMessage> messages;
+    private final SearchQuery query;
+    private final TextExtractor textExtractor;
 
-    public MessageSearches(Iterator<MailboxMessage> messages, SearchQuery query) {
+    public MessageSearches(Iterator<MailboxMessage> messages, SearchQuery query, TextExtractor textExtractor) {
         this.messages = messages;
         this.query = query;
-    }
-
-    /**
-     * Empty constructor only for tests (which test isMatch())
-     */
-    public MessageSearches() {
+        this.textExtractor = textExtractor;
     }
 
     @Override
@@ -207,6 +210,8 @@ public class MessageSearches implements Iterable<SimpleMessageSearchIndex.Search
                 return textContains(value, message);
             case FULL:
                 return messageContains(value, message);
+            case ATTACHMENTS:
+                return attachmentsContain(value, message);
             }
             throw new UnsupportedSearchException();
         } catch (IOException | MimeException e) {
@@ -239,6 +244,20 @@ public class MessageSearches implements Iterable<SimpleMessageSearchIndex.Search
         return isInMessage(value, new SequenceInputStream(textHeaders(message), bodyContent), true);
     }
 
+    private boolean attachmentsContain(String value, MailboxMessage message) throws IOException, MimeException {
+        List<MessageAttachment> attachments = message.getAttachments();
+        return isInAttachments(value, attachments);
+    }
+
+    private boolean isInAttachments(String value, List<MessageAttachment> attachments) {
+        return attachments.stream()
+            .map(MessageAttachment::getAttachment)
+            .map(Throwing.function((Attachment attachment) -> textExtractor.extractContent(attachment.getStream(), attachment.getType()))
+                    .orReturn(new ParsedContent(null, ImmutableMap.of())))
+            .map(ParsedContent::getTextualContent)
+            .anyMatch(string -> string.contains(value));
+    }
+
     private InputStream textHeaders(MailboxMessage message) throws MimeIOException, IOException {
         ByteArrayOutputStream out = new ByteArrayOutputStream();
         new DefaultMessageWriter()

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndex.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndex.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndex.java
index c2614de..9a301b4 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndex.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/SimpleMessageSearchIndex.java
@@ -24,12 +24,14 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 import java.util.stream.Stream;
+
 import javax.inject.Inject;
 
 import org.apache.james.mailbox.MailboxManager.SearchCapabilities;
 import org.apache.james.mailbox.MailboxSession;
 import org.apache.james.mailbox.MessageUid;
 import org.apache.james.mailbox.exception.MailboxException;
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.model.MailboxPath;
 import org.apache.james.mailbox.model.MessageId;
 import org.apache.james.mailbox.model.MessageRange;
@@ -64,16 +66,18 @@ public class SimpleMessageSearchIndex implements MessageSearchIndex {
 
     private final MessageMapperFactory messageMapperFactory;
     private final MailboxMapperFactory mailboxMapperFactory;
+    private final TextExtractor textExtractor;
     
     @Inject
-    public SimpleMessageSearchIndex(MessageMapperFactory messageMapperFactory, MailboxMapperFactory mailboxMapperFactory) {
+    public SimpleMessageSearchIndex(MessageMapperFactory messageMapperFactory, MailboxMapperFactory mailboxMapperFactory, TextExtractor textExtractor) {
         this.messageMapperFactory = messageMapperFactory;
         this.mailboxMapperFactory = mailboxMapperFactory;
+        this.textExtractor = textExtractor;
     }
     
     @Override
     public EnumSet<SearchCapabilities> getSupportedCapabilities() {
-        return EnumSet.of(SearchCapabilities.MultimailboxSearch, SearchCapabilities.Text);
+        return EnumSet.of(SearchCapabilities.MultimailboxSearch, SearchCapabilities.Text, SearchCapabilities.Attachment);
     }
     
     /**
@@ -132,7 +136,7 @@ public class SimpleMessageSearchIndex implements MessageSearchIndex {
             	hitSet.add(m);
             }
         }
-        return ImmutableList.copyOf(new MessageSearches(hitSet.iterator(), query).iterator());
+        return ImmutableList.copyOf(new MessageSearches(hitSet.iterator(), query, textExtractor).iterator());
     }
 
     private boolean isMatchingUser(MailboxSession session, Mailbox mailbox) {

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsMultipartMixedTest.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsMultipartMixedTest.java b/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsMultipartMixedTest.java
index d9f6117..6a5e8df 100644
--- a/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsMultipartMixedTest.java
+++ b/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsMultipartMixedTest.java
@@ -25,9 +25,11 @@ import static org.junit.Assert.assertTrue;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Iterator;
 import java.util.Locale;
 
 import org.apache.james.mailbox.MessageUid;
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.store.mail.model.MailboxMessage;
 import org.apache.james.mailbox.store.search.MessageSearches;
@@ -109,9 +111,9 @@ public class SearchUtilsMultipartMixedTest {
             + "\r\n--2.50290787509--\r\n" + "\r\n--1729--\r\n";
 
     MailboxMessage row;
-
-
     Collection<MessageUid> recent;
+    private MessageSearches messageSearches;
+
     @Before
     public void setUp() throws Exception {
         final MessageBuilder builder = new MessageBuilder();
@@ -124,100 +126,105 @@ public class SearchUtilsMultipartMixedTest {
         builder.body = Charset.forName("us-ascii").encode(BODY).array();
         row = builder.build();
         recent = new ArrayList<>();
+        
+        Iterator<MailboxMessage> messages = null;
+        SearchQuery query = null; 
+        TextExtractor textExtractor = null;
+        messageSearches = new MessageSearches(messages, query, textExtractor);
     }
     
 
     @Test
     public void testShouldNotFindWhatIsNotThere() throws Exception {
-        assertFalse(new MessageSearches().isMatch(SearchQuery.bodyContains("BOGUS"), row,
+        assertFalse(messageSearches.isMatch(SearchQuery.bodyContains("BOGUS"), row,
                 recent));
-        assertFalse(new MessageSearches().isMatch(SearchQuery.mailContains("BOGUS"), row,
+        assertFalse(messageSearches.isMatch(SearchQuery.mailContains("BOGUS"), row,
                 recent));
     }
 
     @Test
     public void testBodyShouldFindTextInBody() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_INNER_MAIL_BODY_ONE), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE),
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO),
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO),
                 row, recent));
     }
 
     @Test
     public void testBodyShouldFindTextInBodyCaseInsensitive() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_INNER_MAIL_BODY_ONE), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE),
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO),
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_INNER_MAIL_BODY_ONE.toLowerCase(Locale.US)), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_INNER_MAIL_BODY_ONE.toUpperCase(Locale.US)), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_ONE
                 .toUpperCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(SAMPLE_PART_TWO
                 .toUpperCase(Locale.US)), row, recent));
     }
 
     @Test
     public void testBodyShouldNotFindTextInHeaders() throws Exception {
-        assertFalse(new MessageSearches().isMatch(SearchQuery
+        assertFalse(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_INNER_MAIL_FIELD), row, recent));
-        assertFalse(new MessageSearches().isMatch(SearchQuery
+        assertFalse(messageSearches.isMatch(SearchQuery
                 .bodyContains(SAMPLE_PART_TWO_FIELD), row, recent));
     }
 
     @Test
     public void testTextShouldFindTextInBody() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_BODY_ONE), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO),
                 row, recent));
     }
 
     @Test
     public void testTextShouldFindTextInBodyCaseInsensitive() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_BODY_ONE), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_BODY_ONE.toLowerCase(Locale.US)), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_BODY_ONE.toUpperCase(Locale.US)), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_ONE
                 .toUpperCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SAMPLE_PART_TWO
                 .toUpperCase(Locale.US)), row, recent));
     }
 
     @Test
     public void testTextShouldFindTextInHeaders() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_FIELD), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_INNER_MAIL_BODY_ONE), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery
+        assertTrue(messageSearches.isMatch(SearchQuery
                 .mailContains(SAMPLE_PART_TWO_FIELD), row, recent));
     }
 }

http://git-wip-us.apache.org/repos/asf/james-project/blob/d76f8fc4/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsRFC822Test.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsRFC822Test.java b/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsRFC822Test.java
index 8f8d9d3..32ff7c8 100644
--- a/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsRFC822Test.java
+++ b/mailbox/store/src/test/java/org/apache/james/mailbox/store/SearchUtilsRFC822Test.java
@@ -25,9 +25,11 @@ import static org.junit.Assert.assertTrue;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Iterator;
 import java.util.Locale;
 
 import org.apache.james.mailbox.MessageUid;
+import org.apache.james.mailbox.extractor.TextExtractor;
 import org.apache.james.mailbox.model.SearchQuery;
 import org.apache.james.mailbox.store.mail.model.MailboxMessage;
 import org.apache.james.mailbox.store.search.MessageSearches;
@@ -52,6 +54,8 @@ public class SearchUtilsRFC822Test {
 
     Collection<MessageUid> recent;
 
+    private MessageSearches messageSearches;
+
     @Before
     public void setUp() throws Exception {
         recent = new ArrayList<>();
@@ -62,58 +66,63 @@ public class SearchUtilsRFC822Test {
         builder.header("Date", "Thu, 14 Feb 2008 12:00:00 +0000 (GMT)");
         builder.body = Charset.forName("us-ascii").encode(BODY).array();
         row = builder.build();
+        
+        Iterator<MailboxMessage> messages = null;
+        SearchQuery query = null; 
+        TextExtractor textExtractor = null;
+        messageSearches = new MessageSearches(messages, query, textExtractor);
     }
 
 
     @Test
     public void testBodyShouldMatchPhraseInBody() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(CUSTARD), row,
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(CUSTARD), row,
                 recent));
-        assertFalse(new MessageSearches().isMatch(SearchQuery
+        assertFalse(messageSearches.isMatch(SearchQuery
                 .bodyContains(CUSTARD + CUSTARD), row, recent));
     }
 
     @Test
     public void testBodyMatchShouldBeCaseInsensitive() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(RHUBARD), row,
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(RHUBARD), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(RHUBARD
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(RHUBARD
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.bodyContains(RHUBARD
+        assertTrue(messageSearches.isMatch(SearchQuery.bodyContains(RHUBARD
                 .toLowerCase(Locale.US)), row, recent));
     }
 
     @Test
     public void testBodyShouldNotMatchPhraseOnlyInHeader() throws Exception {
-        assertFalse(new MessageSearches().isMatch(SearchQuery.bodyContains(FROM_ADDRESS),
+        assertFalse(messageSearches.isMatch(SearchQuery.bodyContains(FROM_ADDRESS),
                 row, recent));
-        assertFalse(new MessageSearches().isMatch(SearchQuery.bodyContains(SUBJECT_PART),
+        assertFalse(messageSearches.isMatch(SearchQuery.bodyContains(SUBJECT_PART),
                 row, recent));
     }
 
     @Test
     public void testTextShouldMatchPhraseInBody() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(CUSTARD), row,
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(CUSTARD), row,
                 recent));
-        assertFalse(new MessageSearches().isMatch(SearchQuery
+        assertFalse(messageSearches.isMatch(SearchQuery
                 .mailContains(CUSTARD + CUSTARD), row, recent));
     }
 
     @Test
     public void testTextMatchShouldBeCaseInsensitive() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(RHUBARD), row,
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(RHUBARD), row,
                 recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(RHUBARD
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(RHUBARD
                 .toLowerCase(Locale.US)), row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(RHUBARD
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(RHUBARD
                 .toLowerCase(Locale.US)), row, recent));
     }
 
     @Test
     public void testBodyShouldMatchPhraseOnlyInHeader() throws Exception {
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(FROM_ADDRESS),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(FROM_ADDRESS),
                 row, recent));
-        assertTrue(new MessageSearches().isMatch(SearchQuery.mailContains(SUBJECT_PART),
+        assertTrue(messageSearches.isMatch(SearchQuery.mailContains(SUBJECT_PART),
                 row, recent));
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org