You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@james.apache.org by rc...@apache.org on 2023/04/19 07:58:01 UTC

[james-project] branch master updated: JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)

This is an automated email from the ASF dual-hosted git repository.

rcordier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new 285f959cd5 JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)
285f959cd5 is described below

commit 285f959cd5d01d8b8aae7c48a822764f469808a9
Author: Benoit TELLIER <bt...@linagora.com>
AuthorDate: Wed Apr 19 14:57:55 2023 +0700

    JAMES-3901 OpenSearch indexing should tolerate bad URL encoding for C… (#1527)
---
 .../mailbox/opensearch/json/MimePartParser.java    | 24 ++++---
 .../json/MessageToOpenSearchJsonTest.java          | 22 +++++++
 .../store/src/test/resources/eml/james-3901.eml    | 73 ++++++++++++++++++++++
 .../store/src/test/resources/eml/james-3901.json   | 47 ++++++++++++++
 4 files changed, 156 insertions(+), 10 deletions(-)

diff --git a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
index bf4ebe10d4..779e0ce02b 100644
--- a/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
+++ b/mailbox/opensearch/src/main/java/org/apache/james/mailbox/opensearch/json/MimePartParser.java
@@ -122,17 +122,21 @@ public class MimePartParser {
     }
 
     private void extractMimePartBodyDescription(MimeTokenStream stream) {
-        MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor();
+        try {
+            MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor();
 
-        Optional.ofNullable(descriptor.getMediaType())
-            .map(MediaType::of)
-            .ifPresent(currentlyBuildMimePart::addMediaType);
-        Optional.ofNullable(descriptor.getSubType())
-            .map(SubType::of)
-            .ifPresent(currentlyBuildMimePart::addSubType);
-        currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType())
-            .addFileName(descriptor.getContentDispositionFilename());
-        extractCharset(descriptor);
+            Optional.ofNullable(descriptor.getMediaType())
+                .map(MediaType::of)
+                .ifPresent(currentlyBuildMimePart::addMediaType);
+            Optional.ofNullable(descriptor.getSubType())
+                .map(SubType::of)
+                .ifPresent(currentlyBuildMimePart::addSubType);
+            currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType())
+                .addFileName(descriptor.getContentDispositionFilename());
+            extractCharset(descriptor);
+        } catch (Exception e) {
+            LOGGER.warn("Failed to extract mime body part description", e);
+        }
     }
 
     private void extractCharset(MaximalBodyDescriptor descriptor) {
diff --git a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
index ba3af351c0..ab2b55fe0d 100644
--- a/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
+++ b/mailbox/opensearch/src/test/java/org/apache/james/mailbox/opensearch/json/MessageToOpenSearchJsonTest.java
@@ -113,6 +113,28 @@ class MessageToOpenSearchJsonTest {
             .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/spamMail.json"));
     }
 
+    @Test
+    void badContentDescriptionShouldStillBeIndexed() throws IOException {
+        MessageToOpenSearchJson messageToOpenSearchJson = new MessageToOpenSearchJson(
+            new DefaultTextExtractor(),
+            ZoneId.of("Europe/Paris"), IndexAttachments.YES, IndexHeaders.YES);
+        MailboxMessage spamMail = new SimpleMailboxMessage(MESSAGE_ID,
+                THREAD_ID,
+                date,
+                SIZE,
+                BODY_START_OCTET,
+                new ByteContent(IOUtils.toByteArray(ClassLoaderUtils.getSystemResourceAsSharedStream("eml/james-3901.eml"))),
+                new Flags(),
+                propertyBuilder.build(),
+                MAILBOX_ID);
+        spamMail.setUid(UID);
+        spamMail.setModSeq(MOD_SEQ);
+
+        assertThatJson(messageToOpenSearchJson.convertToJson(spamMail).block())
+            .when(IGNORING_ARRAY_ORDER)
+            .isEqualTo(ClassLoaderUtils.getSystemResourceAsString("eml/james-3901.json"));
+    }
+
     @Test
     void spamEmailShouldBeWellConvertedToJsonWhenNoHeaders() throws IOException {
         MessageToOpenSearchJson messageToOpenSearchJson = new MessageToOpenSearchJson(
diff --git a/mailbox/store/src/test/resources/eml/james-3901.eml b/mailbox/store/src/test/resources/eml/james-3901.eml
new file mode 100644
index 0000000000..936b85a485
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/james-3901.eml
@@ -0,0 +1,73 @@
+Return-Path: <fi...@upn.integration-open-paas.org>
+Received: from 10.2.0.0 (EHLO 617) ([10.2.0.0])
+          by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID -489272706
+          for <fi...@upn.integration-open-paas.org>;
+          Sat, 28 Nov 2020 10:49:24 +0000 (GMT)
+Date: Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700  (PDT)
+Message-ID: <IY...@zlsvr22>
+from: "Drew Fossum"
+to: "Mary Kay Miller"
+subject: Revised Draft
+filename: dfossum.nsf
+folder: \Drew_Fossum_Dec2000_June2001_2\Notes Folders\Sent
+date: Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700  (PDT)
+Status: RO
+Cc: 
+X-libpst-forensic-sender: Drew Fossum
+X-libpst-forensic-bcc: 
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+	boundary="--boundary-LibPST-iamunique-1722682679_-_-"
+
+
+----boundary-LibPST-iamunique-1722682679_-_-
+Content-Type: text/plain; charset="us-ascii"
+
+---------------------- Forwarded by Drew Fossum/ET&S/Enron on 04/04/2001 
+01:19 PM ---------------------------
+
+
+"Hirasuna, Robert" <rh...@AkinGump.com> on 04/04/2001 10:18:00 AM
+To: "Drew Fossum (E-mail)" <df...@enron.com>
+cc:  
+
+Subject: Revised Draft
+
+
+ <<4%P001!.DOC>> Use this draft instead.  I missed a couple of delted dashes 
+on the first page.
+
+The information contained in this e-mail message is intended only for the 
+personal and confidential use of the recipient(s) named above. This message 
+may be an attorney-client communication and/or work product and as such is 
+privileged and confidential. If the reader of this message is not the 
+intended recipient or an agent responsible for delivering it to the intended 
+recipient, you are hereby notified that you have received this document in 
+error and that any review, dissemination, distribution, or copying of this 
+message is strictly prohibited. If you have received this communication in 
+error, please notify us immediately by e-mail, and delete the original 
+message.
+
+
+ - 4%P001!.DOC
+
+
+***********
+EDRM Enron Email Data Set has been produced in EML, PST and NSF format by ZL Technologies, Inc. This Data Set is licensed under a Creative Commons Attribution 3.0 United States License <http://creativecommons.org/licenses/by/3.0/us/> . To provide attribution, please cite to "ZL Technologies, Inc. (http://www.zlti.com)."
+***********
+
+----boundary-LibPST-iamunique-1722682679_-_-
+Content-Type: application/octet-stream
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; 
+        filename*=utf-8''4%P001!.DOC;
+        filename="4%P001!.DOC"
+
+QXR0YWNobWVudCBDOlxFbnJvbiBEYXRhXGF0dGFjaFxERk9TU1VNREVDMjAwMEpVTkUyMDAxMlww
+MDAwMDAwMDk0NUU0RUI2RUJENkQ2MTE4MjYzMDAwNjVCNUU4RDMyMjREMDIyMDAuIzEuNCVQMDAx
+IS5ET0Mgbm90IGZvdW5kIQ==
+
+
+----boundary-LibPST-iamunique-1722682679_-_---
+
+
diff --git a/mailbox/store/src/test/resources/eml/james-3901.json b/mailbox/store/src/test/resources/eml/james-3901.json
new file mode 100644
index 0000000000..bc81ce5978
--- /dev/null
+++ b/mailbox/store/src/test/resources/eml/james-3901.json
@@ -0,0 +1,47 @@
+{"attachments":[],
+  "bcc":[],
+  "htmlBody":null,
+  "textBody":"---------------------- Forwarded by Drew Fossum/ET&S/Enron on 04/04/2001 \n01:19 PM ---------------------------\n\n\n\"Hirasuna, Robert\" <rh...@AkinGump.com> on 04/04/2001 10:18:00 AM\nTo: \"Drew Fossum (E-mail)\" <df...@enron.com>\ncc:  \n\nSubject: Revised Draft\n\n\n <<4%P001!.DOC>> Use this draft instead.  I missed a couple of delted dashes \non the first page.\n\nThe information contained in this e-mail message is intended only for the \npersonal and confidentia [...]
+  "cc":[],
+  "date":"2015-06-07T00:00:00+0200",
+  "from":[{"name":null,"address":"Drew Fossum","domain":null}],
+  "hasAttachment":false,
+  "headers":[
+    {"name":"return-path","value":"<fi...@upn.integration-open-paas.org>"},
+    {"name":"received","value":"from 10.2.0.0 (EHLO 617) ([10.2.0.0])          by smtp.upn.integration-open-paas.org (JAMES SMTP Server ) with ESMTP ID -489272706          for <fi...@upn.integration-open-paas.org>;          Sat, 28 Nov 2020 10:49:24 +0000 (GMT)"},
+    {"name":"date","value":"Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700  (PDT)"},
+    {"name":"message-id","value":"<IY...@zlsvr22>"},
+    {"name":"from","value":"\"Drew Fossum\""},
+    {"name":"to","value":"\"Mary Kay Miller\""},
+    {"name":"subject","value":"Revised Draft"},
+    {"name":"filename","value":"dfossum.nsf"},
+    {"name":"folder","value":"\\Drew_Fossum_Dec2000_June2001_2\\Notes Folders\\Sent"},
+    {"name":"date","value":"Wed, 4 Apr 2001 13:19:00 -0700 (PDT),Wed, 4 Apr 2001 13:19:00 -0700  (PDT)"},
+    {"name":"status","value":"RO"},
+    {"name":"cc","value":" "},
+    {"name":"x-libpst-forensic-sender", "value":"Drew Fossum"},
+    {"name":"x-libpst-forensic-bcc","value":" "},
+    {"name":"mime-version","value":"1.0"},
+    {"name":"content-type","value":"multipart/mixed;\tboundary=\"--boundary-LibPST-iamunique-1722682679_-_-\""}
+  ],
+  "mailboxId":"18",
+  "mediaType":"plain",
+  "messageId":"184",
+  "threadId":"184",
+  "modSeq":42,
+  "sentDate":"2001-04-04T13:19:00-0700",
+  "saveDate":null,
+  "size":25,
+  "subject":["Revised Draft"],
+  "subtype":"text",
+  "to":[{"name":null,"address":"Mary Kay Miller","domain":null}],
+  "uid":25,
+  "userFlags":[],
+  "mimeMessageID":"<IY...@zlsvr22>",
+  "isAnswered":false,
+  "isDeleted":false,
+  "isDraft":false,
+  "isFlagged":false,
+  "isRecent":false,
+  "isUnread":true
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@james.apache.org
For additional commands, e-mail: notifications-help@james.apache.org