You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by rc...@apache.org on 2023/04/19 07:58:01 UTC
[james-project] branch master updated: JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)
This is an automated email from the ASF dual-hosted git repository.
rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new 285f959cd5 JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)
285f959cd5 is described below
commit 285f959cd5d01d8b8aae7c48a822764f469808a9
Author: Benoit TELLIER <bt...@linagora.com>
AuthorDate: Wed Apr 19 14:57:55 2023 +0700
JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)
---
.../mailbox/opensearch/json/MimePartParser.java | 24 ++++---
.../json/MessageToOpenSearchJsonTest.java | 22 +++++++
.../store/src/test/resources/eml/james-3901.eml | 73 ++++++++++++++++++++++
.../store/src/test/resources/eml/james-3901.json | 47 ++++++++++++++
4 files changed, 156 insertions(+), 10 deletions(-)
diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
index bf4ebe10d4..779e0ce02b 100644
--- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
+++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
@@ -122,17 +122,21 @@ public class MimePartParser {
}
private void extractMimePartBodyDescription(MimeTokenStream stream) {
- MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor();
+ try {
+ MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor();
- Optional.ofNullable(descriptor.getMediaType())
- .map(MediaType::of)
- .ifPresent(currentlyBuildMimePart::addMediaType);
- Optional.ofNullable(descriptor.getSubType())
- .map(SubType::of)
- .ifPresent(currentlyBuildMimePart::addSubType);
- currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType())
- .addFileName(descriptor.getContentDispositionFilename());
- extractCharset(descriptor);
+ Optional.ofNullable(descriptor.getMediaType())
+ .map(MediaType::of)
+ .ifPresent(currentlyBuildMimePart::addMediaType);
+ Optional.ofNullable(descriptor.getSubType())
+ .map(SubType::of)
+ .ifPresent(currentlyBuildMimePart::addSubType);
+ currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType())
+ .addFileName(descriptor.getContentDispositionFilename());
+ extractCharset(descriptor);
+ } catch (Exception e) {
+ LOGGER.warn("Failed to extract mime body part description", e);
+ }
}
private void extractCharset(MaximalBodyDescriptor descriptor) {
diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
index ba3af351c0..ab2b55fe0d 100644
--- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
+++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
@@ -113,6 +113,28 @@ class MessageToOpenSearchJsonTest {
.isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/spamMail.json"));
}
+ @Test
+ void badContentDescriptionShouldStillBeIndexed() throws IOException {
+ MessageToOpenSearchJson messageToOpenSearchJson = new MessageToOpenSearchJson(
+ new DefaultTextExtractor(),
+ ZoneId.of("Europe/Paris"), IndexAttachments.YES, IndexHeaders.YES);
+ MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID,
+ THREAD_ID,
+ date,
+ SIZE,
+ BODY_START_OCTET,
+ new ByteContent(IOUtils.toByteArray(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/james-3901.eml"))),
+ new Flags(),
+ propertyBuilder.build(),
+ MAILBOX_ID);
+ spamMail.setUid(UID);
+ spamMail.setModSeq(MOD_SEQ);
+
+ assertThatJson(messageToOpenSearchJson.convertToJson(spamMail).block())
+ .when(IGNORING_ARRAY_ORDER)
+ .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/james-3901.json"));
+ }
+
@Test
void spamEmailShouldBeWellConvertedToJsonWhenNoHeaders() throws IOException {
MessageToOpenSearchJson messageToOpenSearchJson = new MessageToOpenSearchJson(
diff --git a/mailbox/store/src/test/resources/eml/james-3901.eml b/mailbox/store/src/test/resources/eml/james-3901.eml
new file mode 100644
index 0000000000..936b85a485
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/james-3901.eml
@@ -0,0 +1,73 @@
+Return-Path: <fi...@upn.integration-open-paas.org>
+Received: from 10.2.0.0 (EHLO 617) ([10.2.0.0])
+ by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID -489272706
+ for <fi...@upn.integration-open-paas.org>;
+ Sat, 28 Nov 2020 10:49:24 +0000 (GMT)
+Date: Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700 (PDT)
+Message-ID: <IY...@zlsvr22>
+from: "Drew Fossum"
+to: "Mary Kay Miller"
+subject: Revised Draft
+filename: dfossum.nsf
+folder: \Drew_Fossum_Dec2000_June2001_2\Notes Folders\Sent
+date: Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700 (PDT)
+Status: RO
+Cc:
+X-libpst-forensic-sender: Drew Fossum
+X-libpst-forensic-bcc:
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="--boundary-LibPST-iamunique-1722682679_-_-"
+
+
+----boundary-LibPST-iamunique-1722682679_-_-
+Content-Type: text/plain; charset="us-ascii"
+
+---------------------- Forwarded by Drew Fossum/ET&S/Enron on 04/04/2001
+01:19 PM ---------------------------
+
+
+"Hirasuna, Robert" <rh...@AkinGump.com> on 04/04/2001 10:18:00 AM
+To: "Drew Fossum (E-mail)" <df...@enron.com>
+cc:
+
+Subject: Revised Draft
+
+
+ <<4%P001!.DOC>> Use this draft instead. I missed a couple of delted dashes
+on the first page.
+
+The information contained in this e-mail message is intended only for the
+personal and confidential use of the recipient(s) named above. This message
+may be an attorney-client communication and/or work product and as such is
+privileged and confidential. If the reader of this message is not the
+intended recipient or an agent responsible for delivering it to the intended
+recipient, you are hereby notified that you have received this document in
+error and that any review, dissemination, distribution, or copying of this
+message is strictly prohibited. If you have received this communication in
+error, please notify us immediately by e-mail, and delete the original
+message.
+
+
+ - 4%P001!.DOC
+
+
+***********
+EDRM Enron Email Data Set has been produced in EML, PST and NSF format by ZL Technologies, Inc. This Data Set is licensed under a Creative Commons Attribution 3.0 United States License <http://creativecommons.org/licenses/by/3.0/us/> . To provide attribution, please cite to "ZL Technologies, Inc. (http://www.zlti.com)."
+***********
+
+----boundary-LibPST-iamunique-1722682679_-_-
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename*=utf-8''4%P001!.DOC;
+ filename="4%P001!.DOC"
+
+QXR0YWNobWVudCBDOlxFbnJvbiBEYXRhXGF0dGFjaFxERk9TU1VNREVDMjAwMEpVTkUyMDAxMlww
+MDAwMDAwMDk0NUU0RUI2RUJENkQ2MTE4MjYzMDAwNjVCNUU4RDMyMjREMDIyMDAuIzEuNCVQMDAx
+IS5ET0Mgbm90IGZvdW5kIQ==
+
+
+----boundary-LibPST-iamunique-1722682679_-_---
+
+
diff --git a/mailbox/store/src/test/resources/eml/james-3901.json b/mailbox/store/src/test/resources/eml/james-3901.json
new file mode 100644
index 0000000000..bc81ce5978
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/james-3901.json
@@ -0,0 +1,47 @@
+{"attachments":[],
+ "bcc":[],
+ "htmlBody":null,
+ "textBody":"---------------------- Forwarded by Drew Fossum/ET&S/Enron on 04/04/2001 \n01:19 PM ---------------------------\n\n\n\"Hirasuna, Robert\" <rh...@AkinGump.com> on 04/04/2001 10:18:00 AM\nTo: \"Drew Fossum (E-mail)\" <df...@enron.com>\ncc: \n\nSubject: Revised Draft\n\n\n <<4%P001!.DOC>> Use this draft instead. I missed a couple of delted dashes \non the first page.\n\nThe information contained in this e-mail message is intended only for the \npersonal and confidentia [...]
+ "cc":[],
+ "date":"2015-06-07T00:00:00+0200",
+ "from":[{"name":null,"address":"Drew Fossum","domain":null}],
+ "hasAttachment":false,
+ "headers":[
+ {"name":"return-path","value":"<fi...@upn.integration-open-paas.org>"},
+ {"name":"received","value":"from 10.2.0.0 (EHLO 617) ([10.2.0.0]) by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID -489272706 for <fi...@upn.integration-open-paas.org>; Sat, 28 Nov 2020 10:49:24 +0000 (GMT)"},
+ {"name":"date","value":"Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700 (PDT)"},
+ {"name":"message-id","value":"<IY...@zlsvr22>"},
+ {"name":"from","value":"\"Drew Fossum\""},
+ {"name":"to","value":"\"Mary Kay Miller\""},
+ {"name":"subject","value":"Revised Draft"},
+ {"name":"filename","value":"dfossum.nsf"},
+ {"name":"folder","value":"\\Drew_Fossum_Dec2000_June2001_2\\Notes Folders\\Sent"},
+ {"name":"date","value":"Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700 (PDT)"},
+ {"name":"status","value":"RO"},
+ {"name":"cc","value":" "},
+ {"name":"x-libpst-forensic-sender", "value":"Drew Fossum"},
+ {"name":"x-libpst-forensic-bcc","value":" "},
+ {"name":"mime-version","value":"1.0"},
+ {"name":"content-type","value":"multipart/mixed;\tboundary=\"--boundary-LibPST-iamunique-1722682679_-_-\""}
+ ],
+ "mailboxId":"18",
+ "mediaType":"plain",
+ "messageId":"184",
+ "threadId":"184",
+ "modSeq":42,
+ "sentDate":"2001-04-04T13:19:00-0700",
+ "saveDate":null,
+ "size":25,
+ "subject":["Revised Draft"],
+ "subtype":"text",
+ "to":[{"name":null,"address":"Mary Kay Miller","domain":null}],
+ "uid":25,
+ "userFlags":[],
+ "mimeMessageID":"<IY...@zlsvr22>",
+ "isAnswered":false,
+ "isDeleted":false,
+ "isDraft":false,
+ "isFlagged":false,
+ "isRecent":false,
+ "isUnread":true
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org