You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by rc...@apache.org on 2020/12/23 03:51:15 UTC
[james-project] 01/10: MAILBOX-403 Email main body is also indexed
as an attachment
This is an automated email from the ASF dual-hosted git repository.
rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 22b5bbac6abf7f47c1cb00d0f971da1c3df9606f
Author: Benoit Tellier <bt...@linagora.com>
AuthorDate: Mon Dec 21 18:40:37 2020 +0700
MAILBOX-403 Email main body is also indexed as an attachment
---
.../james/mailbox/elasticsearch/json/MimePart.java | 3 +-
.../search/AbstractMessageSearchIndexTest.java | 15 ++++++++++
mailbox/store/src/test/resources/eml/htmlMail.json | 19 +------------
.../store/src/test/resources/eml/nonTextual.json | 8 ------
.../src/test/resources/eml/recursiveMail.json | 32 ----------------------
5 files changed, 18 insertions(+), 59 deletions(-)
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
index 6f94526..e283267 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/MimePart.java
@@ -303,7 +303,8 @@ public class MimePart {
@JsonIgnore
public Stream<MimePart> getAttachmentsStream() {
return attachments.stream()
- .flatMap((mimePart) -> Stream.concat(Stream.of(mimePart), mimePart.getAttachmentsStream()));
+ .flatMap(mimePart -> Stream.concat(Stream.of(mimePart), mimePart.getAttachmentsStream()))
+ .filter(mimePart -> mimePart.contentDisposition.isPresent());
}
}
diff --git a/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
index b0500ef..9492958 100644
--- a/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
+++ b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/AbstractMessageSearchIndexTest.java
@@ -1273,6 +1273,21 @@ public abstract class AbstractMessageSearchIndexTest {
}
@Test
+ void searchWithTextAttachmentShouldNotMatchMessageBody() throws Exception {
+ assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
+ myFolderMessageManager.appendMessage(
+ MessageManager.AppendCommand.builder()
+ .build(ClassLoader.getSystemResourceAsStream("eml/emailWithTextAttachment.eml")),
+ session).getId();
+ await();
+
+ SearchQuery searchQuery = SearchQuery.of(SearchQuery.attachmentContains("message"));
+
+ assertThat(messageSearchIndex.search(session, mailbox2, searchQuery).toStream())
+ .isEmpty();
+ }
+
+ @Test
void searchWithPDFAttachmentShouldReturnMailsWhenAttachmentContentMatches() throws Exception {
assumeTrue(storeMailboxManager.getSupportedSearchCapabilities().contains(MailboxManager.SearchCapabilities.Attachment));
byte[] attachmentContent = ClassLoaderUtils.getSystemResourceAsByteArray("eml/attachment.pdf");
diff --git a/mailbox/store/src/test/resources/eml/htmlMail.json b/mailbox/store/src/test/resources/eml/htmlMail.json
index 45445d2..85bb47a 100644
--- a/mailbox/store/src/test/resources/eml/htmlMail.json
+++ b/mailbox/store/src/test/resources/eml/htmlMail.json
@@ -115,24 +115,7 @@
"Text and Html not similar"
],
"sentDate":"2015-06-04T07:36:08+0000",
- "attachments":[
- {
- "textContent":"The text/plain part is not matching the html one.\r\n",
- "mediaType":"text",
- "subtype":"plain",
- "fileName":null,
- "fileExtension":null,
- "contentDisposition":null
- },
- {
- "textContent":"<html>\r\n <head>\r\n\r\n <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n </head>\r\n <body bgcolor=\"#FFFFFF\" text=\"#000000\">\r\n <i>This is a mail with <b>beautifull</b> html content which contains a banana.</i><br>\r\n </body>\r\n</html>\r\n",
- "mediaType":"text",
- "subtype":"html",
- "fileName":null,
- "fileExtension":null,
- "contentDisposition":null
- }
- ],
+ "attachments":[],
"textBody": "The text/plain part is not matching the html one.\r\n",
"htmlBody": "<html>\r\n <head>\r\n\r\n <meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\r\n </head>\r\n <body bgcolor=\"#FFFFFF\" text=\"#000000\">\r\n <i>This is a mail with <b>beautifull</b> html content which contains a banana.</i><br>\r\n </body>\r\n</html>\r\n",
"isDeleted":true,
diff --git a/mailbox/store/src/test/resources/eml/nonTextual.json b/mailbox/store/src/test/resources/eml/nonTextual.json
index 121434e..5fc73cf 100644
--- a/mailbox/store/src/test/resources/eml/nonTextual.json
+++ b/mailbox/store/src/test/resources/eml/nonTextual.json
@@ -1,14 +1,6 @@
{
"attachments": [
{
- "mediaType":"text",
- "subtype":"plain",
- "fileName":null,
- "fileExtension":null,
- "contentDisposition":null,
- "textContent":"This mail have a non textual attachment !\r\n"
- },
- {
"mediaType":"application",
"subtype":"vnd.oasis.opendocument.text",
"fileName":"toto.odt","fileExtension":"odt",
diff --git a/mailbox/store/src/test/resources/eml/recursiveMail.json b/mailbox/store/src/test/resources/eml/recursiveMail.json
index e59b266..73121e0 100644
--- a/mailbox/store/src/test/resources/eml/recursiveMail.json
+++ b/mailbox/store/src/test/resources/eml/recursiveMail.json
@@ -78,38 +78,6 @@
"sentDate": "2015-06-10T10:45:27+02:00",
"attachments": [
{
- "textContent": "Forward as attachment !\n\n\n",
- "mediaType": "text",
- "subtype": "plain",
- "fileName": null,
- "fileExtension": null,
- "contentDisposition": null
- },
- {
- "textContent": null,
- "mediaType": null,
- "subtype": null,
- "fileName": null,
- "fileExtension": null,
- "contentDisposition": null
- },
- {
- "textContent": null,
- "mediaType": null,
- "subtype": null,
- "fileName": null,
- "fileExtension": null,
- "contentDisposition": null
- },
- {
- "textContent": "Textual content of one attachment ( mail signature )\n",
- "mediaType": "text",
- "subtype": "plain",
- "fileName": null,
- "fileExtension": null,
- "contentDisposition": null
- },
- {
"textContent": null,
"mediaType": "application",
"subtype": "vnd.oasis.opendocument.spreadsheet",
---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org