You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2017/02/15 00:01:25 UTC
[07/13] james-project git commit: JAMES-1934 implemens hasAttachment
search for ES
JAMES-1934 implemens hasAttachment search for ES
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/c1357bca
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/c1357bca
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/c1357bca
Branch: refs/heads/master
Commit: c1357bca59a99a5e422068782fef24f5195b86e8
Parents: 59cfd84
Author: Luc DUZAN <ld...@linagora.com>
Authored: Thu Feb 9 12:56:53 2017 +0100
Committer: Benoit Tellier <bt...@linagora.com>
Committed: Wed Feb 15 06:59:45 2017 +0700
----------------------------------------------------------------------
.../elasticsearch/MailboxMappingFactory.java | 4 +
.../elasticsearch/json/IndexableMessage.java | 24 ++++--
.../json/IndexableMessageWithMessageId.java | 6 +-
.../elasticsearch/query/CriterionConverter.java | 6 ++
.../json/IndexableMessageTest.java | 80 +++++++++++++++++++-
.../src/test/resources/eml/Toto.eml | 41 ----------
.../store/src/test/resources/eml/htmlMail.json | 2 +-
.../src/test/resources/eml/nonTextual.json | 2 +-
.../store/src/test/resources/eml/spamMail.json | 2 +-
9 files changed, 111 insertions(+), 56 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java
index aaa0009..7c54b40 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/MailboxMappingFactory.java
@@ -212,6 +212,10 @@ public class MailboxMappingFactory {
.endObject()
.endObject()
+ .startObject(JsonMessageConstants.HAS_ATTACHMENT)
+ .field(NodeMappingFactory.TYPE, NodeMappingFactory.BOOLEAN)
+ .endObject()
+
.startObject(JsonMessageConstants.TEXT)
.field(NodeMappingFactory.TYPE, NodeMappingFactory.STRING)
.field(NodeMappingFactory.ANALYZER, NodeMappingFactory.SNOWBALL)
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
index 770f558..6150b19 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessage.java
@@ -34,6 +34,8 @@ import org.apache.james.mailbox.elasticsearch.query.DateResolutionFormater;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.store.mail.model.MailboxMessage;
import org.apache.james.mailbox.store.mail.model.Property;
+import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
+import org.apache.james.mailbox.store.mail.model.impl.SimpleProperty;
import org.apache.james.mime4j.MimeException;
import com.fasterxml.jackson.annotation.JsonProperty;
@@ -46,6 +48,8 @@ import com.google.common.collect.Multimap;
public class IndexableMessage {
+ public static final SimpleProperty HAS_ATTACHMENT_PROPERTY = new SimpleProperty(PropertyBuilder.JAMES_INTERNALS, PropertyBuilder.HAS_ATTACHMENT, "true");
+
public static IndexableMessage from(MailboxMessage message, List<User> users, TextExtractor textExtractor,
ZoneId zoneId, IndexAttachments indexAttachments) {
@@ -57,6 +61,9 @@ public class IndexableMessage {
indexableMessage.users = users.stream().map(User::getUserName).collect(Guavate.toImmutableList());
indexableMessage.bodyText = parsingResult.locateFirstTextBody();
indexableMessage.bodyHtml = parsingResult.locateFirstHtmlBody();
+ indexableMessage.hasAttachment = message.getProperties()
+ .stream()
+ .anyMatch(property -> property.equals(HAS_ATTACHMENT_PROPERTY));
indexableMessage.setFlattenedAttachments(parsingResult, indexAttachments);
indexableMessage.copyHeaderFields(parsingResult.getHeaderCollection(), getSanitizedInternalDate(message, zoneId));
indexableMessage.generateText();
@@ -68,11 +75,13 @@ public class IndexableMessage {
}
private void setFlattenedAttachments(MimePart parsingResult, IndexAttachments indexAttachments) {
- if (indexAttachments.equals(IndexAttachments.YES)) {
- attachments = parsingResult.getAttachmentsStream()
- .collect(Collectors.toList());
+ List<MimePart> mimeparts = parsingResult.getAttachmentsStream()
+ .collect(Guavate.toImmutableList());
+
+ if (IndexAttachments.YES.equals(indexAttachments)) {
+ this.attachments = mimeparts;
} else {
- attachments = ImmutableList.of();
+ this.attachments = ImmutableList.of();
}
}
@@ -134,6 +143,7 @@ public class IndexableMessage {
private String date;
private String mediaType;
private String subType;
+ private boolean hasAttachment;
private boolean isUnRead;
private boolean isRecent;
private boolean isFlagged;
@@ -155,11 +165,12 @@ public class IndexableMessage {
private Optional<String> bodyHtml;
private String text;
+
public IndexableMessage(long uid, String mailboxId, List<String> users, long modSeq, long size, String date, String mediaType,
String subType, boolean isUnRead, boolean isRecent, boolean isFlagged, boolean isDeleted, boolean isDraft,
boolean isAnswered, String[] userFlags, Multimap<String, String> headers, EMailers from, EMailers to,
EMailers cc, EMailers bcc, EMailers replyTo, Subjects subjects, String sentDate, List<Property> properties,
- List<MimePart> attachments, Optional<String> bodyText, Optional<String> bodyHtml, String text) {
+ List<MimePart> attachments, boolean hasAttachment, Optional<String> bodyText, Optional<String> bodyHtml, String text) {
this.uid = uid;
this.mailboxId = mailboxId;
this.users = users;
@@ -185,6 +196,7 @@ public class IndexableMessage {
this.sentDate = sentDate;
this.properties = properties;
this.attachments = attachments;
+ this.hasAttachment = hasAttachment;
this.bodyText = bodyText;
this.bodyHtml = bodyHtml;
this.text = text;
@@ -330,7 +342,7 @@ public class IndexableMessage {
@JsonProperty(JsonMessageConstants.HAS_ATTACHMENT)
public boolean getHasAttachment() {
- return attachments.size() > 0;
+ return hasAttachment;
}
@JsonProperty(JsonMessageConstants.TEXT)
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java
index 8f376dc..2578580 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageWithMessageId.java
@@ -43,7 +43,7 @@ public class IndexableMessageWithMessageId extends IndexableMessage {
indexableMessage.isDeleted(), indexableMessage.isDraft(), indexableMessage.isAnswered(), indexableMessage.getUserFlags(),
indexableMessage.getHeaders(), indexableMessage.getFrom(), indexableMessage.getTo(), indexableMessage.getCc(), indexableMessage.getBcc(),
indexableMessage.getReplyTo(), indexableMessage.getSubjects(), indexableMessage.getSentDate(), indexableMessage.getProperties(),
- indexableMessage.getAttachments(), indexableMessage.getBodyText(), indexableMessage.getBodyHtml(), indexableMessage.getText(),
+ indexableMessage.getAttachments(), indexableMessage.getHasAttachment(), indexableMessage.getBodyText(), indexableMessage.getBodyHtml(), indexableMessage.getText(),
message.getMessageId().serialize());
}
@@ -53,10 +53,10 @@ public class IndexableMessageWithMessageId extends IndexableMessage {
String mediaType, String subType, boolean isUnRead, boolean isRecent, boolean isFlagged,
boolean isDeleted, boolean isDraft, boolean isAnswered, String[] userFlags, Multimap<String, String> headers,
EMailers from, EMailers to, EMailers cc, EMailers bcc, EMailers replyTo, Subjects subjects,
- String sentDate, List<Property> properties, List<MimePart> attachments, Optional<String> bodyText,
+ String sentDate, List<Property> properties, List<MimePart> attachments, boolean hasAttachments, Optional<String> bodyText,
Optional<String> bodyHtml, String text, String messageId) {
super(uid, mailboxId, users, modSeq, size, date, mediaType, subType, isUnRead, isRecent, isFlagged, isDeleted,
- isDraft, isAnswered, userFlags, headers, from, to, cc, bcc, replyTo, subjects, sentDate, properties, attachments,
+ isDraft, isAnswered, userFlags, headers, from, to, cc, bcc, replyTo, subjects, sentDate, properties, attachments, hasAttachments,
bodyText, bodyHtml, text);
this.messageId = messageId;
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
index 3904a92..dcc4ff6 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/query/CriterionConverter.java
@@ -78,6 +78,8 @@ public class CriterionConverter {
registerCriterionConverter(SearchQuery.InternalDateCriterion.class,
criterion -> dateRangeFilter(JsonMessageConstants.DATE, criterion.getOperator()));
+
+ registerCriterionConverter(SearchQuery.AttachmentCriterion.class, this::convertAttachmentCriterion);
}
@SuppressWarnings("unchecked")
@@ -115,6 +117,10 @@ public class CriterionConverter {
return criterionConverterMap.get(criterion.getClass()).apply(criterion);
}
+ private QueryBuilder convertAttachmentCriterion(SearchQuery.AttachmentCriterion criterion) {
+ return termQuery(JsonMessageConstants.HAS_ATTACHMENT, criterion.getOperator().isSet());
+ }
+
private QueryBuilder convertCustomFlagCriterion(SearchQuery.CustomFlagCriterion criterion) {
QueryBuilder termQueryBuilder = termQuery(JsonMessageConstants.USER_FLAGS, criterion.getFlag());
if (criterion.getOperator().isSet()) {
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
index cf98e6e..a66f326 100644
--- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
+++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java
@@ -20,11 +20,12 @@
package org.apache.james.mailbox.elasticsearch.json;
import static org.assertj.core.api.Assertions.assertThat;
-import static org.mockito.Mockito.any;
+import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.ByteArrayInputStream;
+import java.io.IOException;
import java.time.ZoneId;
import javax.mail.Flags;
@@ -38,6 +39,8 @@ import org.apache.james.mailbox.mock.MockMailboxSession;
import org.apache.james.mailbox.model.TestId;
import org.apache.james.mailbox.store.extractor.DefaultTextExtractor;
import org.apache.james.mailbox.store.mail.model.MailboxMessage;
+import org.apache.james.mailbox.store.mail.model.impl.PropertyBuilder;
+import org.apache.james.mailbox.store.mail.model.impl.SimpleProperty;
import org.apache.james.mailbox.tika.extractor.TikaTextExtractor;
import org.junit.Test;
@@ -209,6 +212,77 @@ public class IndexableMessageTest {
}
@Test
+ public void hasAttachmentsShouldReturnTrueWhenPropertyIsPresentAndTrue() throws IOException {
+ //Given
+ MailboxMessage mailboxMessage = mock(MailboxMessage.class);
+ TestId mailboxId = TestId.of(1);
+ when(mailboxMessage.getMailboxId())
+ .thenReturn(mailboxId);
+ when(mailboxMessage.getFullContent())
+ .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml"))));
+ when(mailboxMessage.createFlags())
+ .thenReturn(new Flags());
+ when(mailboxMessage.getUid())
+ .thenReturn(MESSAGE_UID);
+ when(mailboxMessage.getProperties()).thenReturn(ImmutableList.of(IndexableMessage.HAS_ATTACHMENT_PROPERTY));
+
+ // When
+ IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()),
+ new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES);
+
+ // Then
+ assertThat(indexableMessage.getHasAttachment()).isTrue();
+ }
+
+ @Test
+ public void hasAttachmentsShouldReturnFalseWhenPropertyIsPresentButFalse() throws IOException {
+ //Given
+ MailboxMessage mailboxMessage = mock(MailboxMessage.class);
+ TestId mailboxId = TestId.of(1);
+ when(mailboxMessage.getMailboxId())
+ .thenReturn(mailboxId);
+ when(mailboxMessage.getFullContent())
+ .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml"))));
+ when(mailboxMessage.createFlags())
+ .thenReturn(new Flags());
+ when(mailboxMessage.getUid())
+ .thenReturn(MESSAGE_UID);
+ when(mailboxMessage.getProperties())
+ .thenReturn(ImmutableList.of(new SimpleProperty(PropertyBuilder.JAMES_INTERNALS, PropertyBuilder.HAS_ATTACHMENT, "false")));
+
+ // When
+ IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()),
+ new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES);
+
+ // Then
+ assertThat(indexableMessage.getHasAttachment()).isFalse();
+ }
+
+ @Test
+ public void hasAttachmentsShouldReturnFalseWhenPropertyIsAbsent() throws IOException {
+ //Given
+ MailboxMessage mailboxMessage = mock(MailboxMessage.class);
+ TestId mailboxId = TestId.of(1);
+ when(mailboxMessage.getMailboxId())
+ .thenReturn(mailboxId);
+ when(mailboxMessage.getFullContent())
+ .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml"))));
+ when(mailboxMessage.createFlags())
+ .thenReturn(new Flags());
+ when(mailboxMessage.getUid())
+ .thenReturn(MESSAGE_UID);
+ when(mailboxMessage.getProperties())
+ .thenReturn(ImmutableList.of());
+
+ // When
+ IndexableMessage indexableMessage = IndexableMessage.from(mailboxMessage, ImmutableList.of(new MockMailboxSession("username").getUser()),
+ new DefaultTextExtractor(), ZoneId.of("Europe/Paris"), IndexAttachments.YES);
+
+ // Then
+ assertThat(indexableMessage.getHasAttachment()).isFalse();
+ }
+
+ @Test
public void attachmentsShouldNotBeenIndexedWhenAsked() throws Exception {
//Given
MailboxMessage mailboxMessage = mock(MailboxMessage.class);
@@ -216,7 +290,7 @@ public class IndexableMessageTest {
when(mailboxMessage.getMailboxId())
.thenReturn(mailboxId);
when(mailboxMessage.getFullContent())
- .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml"))));
+ .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml"))));
when(mailboxMessage.createFlags())
.thenReturn(new Flags());
when(mailboxMessage.getUid())
@@ -238,7 +312,7 @@ public class IndexableMessageTest {
when(mailboxMessage.getMailboxId())
.thenReturn(mailboxId);
when(mailboxMessage.getFullContent())
- .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/Toto.eml"))));
+ .thenReturn(new ByteArrayInputStream(IOUtils.toByteArray(ClassLoader.getSystemResourceAsStream("eml/emailWith3Attachments.eml"))));
when(mailboxMessage.createFlags())
.thenReturn(new Flags());
when(mailboxMessage.getUid())
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/elasticsearch/src/test/resources/eml/Toto.eml
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/resources/eml/Toto.eml b/mailbox/elasticsearch/src/test/resources/eml/Toto.eml
deleted file mode 100644
index ab2de03..0000000
--- a/mailbox/elasticsearch/src/test/resources/eml/Toto.eml
+++ /dev/null
@@ -1,41 +0,0 @@
-Return-Path: <lr...@linagora.com>
-Received: from alderaan.linagora.com (smtp.linagora.dc1 [172.16.18.53])
- by imap (Cyrus v2.2.13-Debian-2.2.13-19+squeeze3) with LMTPA;
- Tue, 29 Nov 2016 13:57:56 +0100
-X-Sieve: CMU Sieve 2.2
-Received: from [10.69.0.146] (mne69-10-88-173-78-196.fbx.proxad.net [88.173.78.196])
- (using TLSv1 with cipher DHE-RSA-AES128-SHA (128/128 bits))
- (No client certificate requested)
- by alderaan.linagora.com (Postfix) with ESMTPSA id CB0233783
- for <lr...@linagora.com>; Tue, 29 Nov 2016 13:57:56 +0100 (CET)
-To: Laura ROYET <lr...@linagora.com>
-From: Laura Royet <lr...@linagora.com>
-Subject: Toto
-Message-ID: <25...@linagora.com>
-Date: Tue, 29 Nov 2016 13:57:56 +0100
-User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101
- Thunderbird/45.5.0
-MIME-Version: 1.0
-Content-Type: multipart/mixed;
- boundary="------------3F646081DC313215FD6847F4"
-
-This is a multi-part message in MIME format.
---------------3F646081DC313215FD6847F4
-Content-Type: text/plain; charset=utf-8; format=flowed
-Content-Transfer-Encoding: 7bit
-
-
-
---
-Laura Royet
-
-
---------------3F646081DC313215FD6847F4
-Content-Type: text/plain; charset=UTF-8;
- name="Toto.txt"
-Content-Transfer-Encoding: base64
-Content-Disposition: attachment;
- filename="Toto.txt"
-
-VG90bwpDb3B5cmlnaHQgwqkgMjAxNiBMSU5BR09SQSAKQ0MgQlktU0EsIEdOVSBGREwK
---------------3F646081DC313215FD6847F4--
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/htmlMail.json
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json b/mailbox/store/src/test/resources/eml/htmlMail.json
index 1655f5e..a8998ef 100644
--- a/mailbox/store/src/test/resources/eml/htmlMail.json
+++ b/mailbox/store/src/test/resources/eml/htmlMail.json
@@ -159,7 +159,7 @@
"isAnswered":false,
"isFlagged":false,
"isRecent":false,
- "hasAttachment":true,
+ "hasAttachment":false,
"isUnread":false,
"users": [
"username"
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/nonTextual.json
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json
index 9d1aa9c..e44e2d8 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -154,7 +154,7 @@
"isDraft":false,
"isFlagged":false,
"isRecent":false,
- "hasAttachment":true,
+ "hasAttachment":false,
"isUnread":true,
"users": [
"username"
http://git-wip-us.apache.org/repos/asf/james-project/blob/c1357bca/mailbox/store/src/test/resources/eml/spamMail.json
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/resources/eml/spamMail.json b/mailbox/store/src/test/resources/eml/spamMail.json
index b71a5ce..388aa2d 100644
--- a/mailbox/store/src/test/resources/eml/spamMail.json
+++ b/mailbox/store/src/test/resources/eml/spamMail.json
@@ -198,7 +198,7 @@
"isDraft": false,
"isFlagged": false,
"isRecent": false,
- "hasAttachment": true,
+ "hasAttachment": false,
"isUnread": true,
"users": [
"username"
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org