You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by rc...@apache.org on 2020/12/23 03:51:15 UTC

[james-project] 01/10: MAILBOX-403 Email main body is also indexed as an attachment

This is an automated email from the ASF dual-hosted git repository.

rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 22b5bbac6abf7f47c1cb00d0f971da1c3df9606f
Author: Benoit Tellier <bt...@linagora.com>
AuthorDate: Mon Dec 21 18:40:37 2020 +0700

    MAILBOX-403 Email main body is also indexed as an attachment
---
 .../james/mailbox/elasticsearch/json/MimePart.java |  3 +-
 .../search/AbstractMessageSearchIndexTest.java     | 15 ++++++++++
 mailbox/store/src/test/resources/eml/htmlMail.json | 19 +------------
 .../store/src/test/resources/eml/nonTextual.json   |  8 ------
 .../src/test/resources/eml/recursiveMail.json      | 32 ----------------------
 5 files changed, 18 insertions(+), 59 deletions(-)

diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
index 6f94526..e283267 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
@@ -303,7 +303,8 @@ public class MimePart {
     @JsonIgnore
     public Stream<MimePart> getAttachmentsStream() {
         return attachments.stream()
-                .flatMap((mimePart) -> Stream.concat(Stream.of(mimePart), mimePart.getAttachmentsStream()));
+                .flatMap(mimePart -> Stream.concat(Stream.of(mimePart), mimePart.getAttachmentsStream()))
+                .filter(mimePart -> mimePart.contentDisposition.isPresent());
     }
 
 }
diff --git a/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
index b0500ef..9492958 100644
--- a/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
+++ b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
@@ -1273,6 +1273,21 @@ public abstract class AbstractMessageSearchIndexTest {
     }
 
     @Test
+    void searchWithTextAttachmentShouldNotMatchMessageBody() throws Exception {
+        assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
+        myFolderMessageManager.appendMessage(
+            MessageManager.AppendCommand.builder()
+                .build(ClassLoader.getSystemResourceAsStream("eml/emailWithTextAttachment.eml")),
+            session).getId();
+        await();
+
+        SearchQuery searchQuery = SearchQuery.of(SearchQuery.attachmentContains("message"));
+
+        assertThat(messageSearchIndex.search(session, mailbox2, searchQuery).toStream())
+            .isEmpty();
+    }
+
+    @Test
     void searchWithPDFAttachmentShouldReturnMailsWhenAttachmentContentMatches() throws Exception {
         assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
         byte[] attachmentContent = ClassLoaderUtils.getSystemResourceAsByteArray("eml/attachment.pdf");
diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json b/mailbox/store/src/test/resources/eml/htmlMail.json
index 45445d2..85bb47a 100644
--- a/mailbox/store/src/test/resources/eml/htmlMail.json
+++ b/mailbox/store/src/test/resources/eml/htmlMail.json
@@ -115,24 +115,7 @@
     "Text and Html not similar"
   ],
   "sentDate":"2015-06-04T07:36:08+0000",
-  "attachments":[
-    {
-      "textContent":"The text/plain part is not matching the html one.\r\n",
-      "mediaType":"text",
-      "subtype":"plain",
-      "fileName":null,
-      "fileExtension":null,
-      "contentDisposition":null
-    },
-    {
-      "textContent":"<html>\r\n  <head>\r\n\r\n    <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n  </head>\r\n  <body bgcolor=\"#FFFFFF\" text=\"#000000\">\r\n    <i>This is a mail with <b>beautifull</b> html content which contains a banana.</i><br>\r\n  </body>\r\n</html>\r\n",
-      "mediaType":"text",
-      "subtype":"html",
-      "fileName":null,
-      "fileExtension":null,
-      "contentDisposition":null
-    }
-  ],
+  "attachments":[],
   "textBody": "The text/plain part is not matching the html one.\r\n",
   "htmlBody": "<html>\r\n  <head>\r\n\r\n    <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n  </head>\r\n  <body bgcolor=\"#FFFFFF\" text=\"#000000\">\r\n    <i>This is a mail with <b>beautifull</b> html content which contains a banana.</i><br>\r\n  </body>\r\n</html>\r\n",
   "isDeleted":true,
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json
index 121434e..5fc73cf 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -1,14 +1,6 @@
 {
   "attachments": [
     {
-      "mediaType":"text",
-      "subtype":"plain",
-      "fileName":null,
-      "fileExtension":null,
-      "contentDisposition":null,
-      "textContent":"This mail have a non textual attachment !\r\n"
-    },
-    {
       "mediaType":"application",
       "subtype":"vnd.oasis.opendocument.text",
       "fileName":"toto.odt","fileExtension":"odt",
diff --git a/mailbox/store/src/test/resources/eml/recursiveMail.json b/mailbox/store/src/test/resources/eml/recursiveMail.json
index e59b266..73121e0 100644
--- a/mailbox/store/src/test/resources/eml/recursiveMail.json
+++ b/mailbox/store/src/test/resources/eml/recursiveMail.json
@@ -78,38 +78,6 @@
   "sentDate": "2015-06-10T10:45:27+02:00",
   "attachments": [
     {
-      "textContent": "Forward as attachment !\n\n\n",
-      "mediaType": "text",
-      "subtype": "plain",
-      "fileName": null,
-      "fileExtension": null,
-      "contentDisposition": null
-    },
-    {
-      "textContent": null,
-      "mediaType": null,
-      "subtype": null,
-      "fileName": null,
-      "fileExtension": null,
-      "contentDisposition": null
-    },
-    {
-      "textContent": null,
-      "mediaType": null,
-      "subtype": null,
-      "fileName": null,
-      "fileExtension": null,
-      "contentDisposition": null
-    },
-    {
-      "textContent": "Textual content of one attachment ( mail signature )\n",
-      "mediaType": "text",
-      "subtype": "plain",
-      "fileName": null,
-      "fileExtension": null,
-      "contentDisposition": null
-    },
-    {
       "textContent": null,
       "mediaType": "application",
       "subtype": "vnd.oasis.opendocument.spreadsheet",


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org