You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2015/06/29 10:39:30 UTC
svn commit: r1688139 - in /james/mailbox/trunk/elasticsearch/src:
main/java/org/apache/james/mailbox/elasticsearch/json/
test/java/org/apache/james/mailbox/elasticsearch/json/
test/resources/documents/
Author: btellier
Date: Mon Jun 29 08:39:30 2015
New Revision: 1688139
URL: http://svn.apache.org/r1688139
Log:
MAILBOX-234 Dates extraction from headers
Modified:
james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail1.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail2.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail3.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail4.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.eml
james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
Modified: james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java (original)
+++ james/mailbox/trunk/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java Mon Jun 29 08:39:30 2015
@@ -19,6 +19,7 @@
package org.apache.james.mailbox.elasticsearch.json;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMultimap;
@@ -39,6 +40,8 @@ import java.time.format.DateTimeFormatte
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -46,6 +49,14 @@ public class HeaderCollection {
public static class Builder {
+ // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
+ // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
+ // This REGEXP is here to match ( in order to remove ) the possible invalid end of a header date
+ // Example of matching patterns :
+ // (UTC)
+ // (CEST)
+ private static final Pattern DATE_SANITIZING_PATTERN = Pattern.compile(" *\\(.*\\) *");
+
private final Set<EMailer> toAddressSet;
private final Set<EMailer> fromAddressSet;
private final Set<EMailer> ccAddressSet;
@@ -135,13 +146,26 @@ public class HeaderCollection {
private Optional<ZonedDateTime> toISODate(String value) {
try {
- return Optional.of(ZonedDateTime.parse(value, DateTimeFormatter.RFC_1123_DATE_TIME));
+ return Optional.of(ZonedDateTime.parse(
+ sanitizeDateStringHeaderValue(value),
+ DateTimeFormatter.RFC_1123_DATE_TIME));
} catch (Exception e) {
LOGGER.info("Can not parse receive date " + value);
return Optional.empty();
}
}
+ @VisibleForTesting String sanitizeDateStringHeaderValue(String value) {
+ // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
+ // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
+ // This method is here to convert the first date into something parsable by RFC_1123_DATE_TIME DateTimeFormatter
+ Matcher sanitizerMatcher = DATE_SANITIZING_PATTERN.matcher(value);
+ if (sanitizerMatcher.find()) {
+ return value.substring(0 , sanitizerMatcher.start());
+ }
+ return value;
+ }
+
}
public static final String TO = "to";
Modified: james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java (original)
+++ james/mailbox/trunk/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java Mon Jun 29 08:39:30 2015
@@ -130,6 +130,13 @@ public class HeaderCollectionTest {
}
@Test
+ public void nonStandardDatesShouldBeRetreived() {
+ HeaderCollection headerCollection = HeaderCollection.builder().add(new FieldImpl("Date", "Thu, 4 Jun 2015 06:08:41 +0200 (UTC)")).build();
+ assertThat(DATE_TIME_FORMATTER.format(headerCollection.getSentDate().get()))
+ .isEqualTo("2015/06/04 06:08:41");
+ }
+
+ @Test
public void dateShouldBeAbsentOnInvalidHeader() {
HeaderCollection headerCollection = HeaderCollection.builder().add(new FieldImpl("Date", "Not a date")).build();
assertThat(headerCollection.getSentDate().isPresent())
@@ -148,4 +155,32 @@ public class HeaderCollectionTest {
HeaderCollection.builder().add(null).build();
}
+ @Test
+ public void sanitizeDateStringHeaderValueShouldRemoveCESTPart() {
+ assertThat(HeaderCollection.builder()
+ .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (CEST)"))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldRemoveUTCPart() {
+ assertThat(HeaderCollection.builder()
+ .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (UTC) "))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldNotChangeAcceptableString() {
+ assertThat(HeaderCollection.builder()
+ .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200"))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldNotChangeEmptyString() {
+ assertThat(HeaderCollection.builder()
+ .sanitizeDateStringHeaderValue(""))
+ .isEqualTo("");
+ }
+
}
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail1.eml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail1.eml?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail1.eml (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail1.eml Mon Jun 29 08:39:30 2015
@@ -32,7 +32,7 @@ Delivered-To: mailing list server-dev@ja
Received: (qmail 37236 invoked by uid 99); 4 Jun 2015 09:23:38 -0000
Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28)
by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 04 Jun 2015 09:23:38 +0000
-Date: Thu, 4 Jun 2015 09:23:37 +0000
+Date: Thu, 4 Jun 2015 09:23:37 +0000 (UTC)
From: "Tellier Benoit (JIRA)" <ji...@apache.org>
To: server-dev@james.apache.org
Message-ID: <JI...@Atlassian.JIRA>
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail2.eml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail2.eml?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail2.eml (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail2.eml Mon Jun 29 08:39:30 2015
@@ -32,7 +32,7 @@ Delivered-To: mailing list server-dev@ja
Received: (qmail 43130 invoked by uid 99); 4 Jun 2015 09:27:38 -0000
Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28)
by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 04 Jun 2015 09:27:38 +0000
-Date: Thu, 4 Jun 2015 09:27:37 +0000
+Date: Thu, 4 Jun 2015 09:27:37 +0000 (UTC)
From: "Tellier Benoit (JIRA)" <ji...@apache.org>
To: server-dev@james.apache.org
Message-ID: <JI...@Atlassian.JIRA>
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail3.eml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail3.eml?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail3.eml (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail3.eml Mon Jun 29 08:39:30 2015
@@ -34,7 +34,7 @@ Delivered-To: mailing list server-dev@ja
Received: (qmail 1132 invoked by uid 99); 2 Jun 2015 08:16:20 -0000
Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28)
by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 02 Jun 2015 08:16:20 +0000
-Date: Tue, 2 Jun 2015 08:16:19 +0000
+Date: Tue, 2 Jun 2015 08:16:19 +0000 (UTC)
From: "Eric Charles (JIRA)" <ji...@apache.org>
To: server-dev@james.apache.org
Message-ID: <JI...@Atlassian.JIRA>
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail4.eml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail4.eml?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail4.eml (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/mail4.eml Mon Jun 29 08:39:30 2015
@@ -34,7 +34,7 @@ Delivered-To: mailing list mailet-api@ja
Received: (qmail 81730 invoked by uid 99); 15 May 2015 06:36:00 -0000
Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28)
by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 May 2015 06:36:00 +0000
-Date: Fri, 15 May 2015 06:35:59 +0000
+Date: Fri, 15 May 2015 06:35:59 +0000 (UTC)
From: "Eric Charles (JIRA)" <ma...@james.apache.org>
To: mailet-api@james.apache.org
Message-ID: <JI...@Atlassian.JIRA>
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.eml
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.eml?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.eml (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.eml Mon Jun 29 08:39:30 2015
@@ -31,7 +31,7 @@ Content-Type: multipart/mixed; boundary=
Content-Transfer-Encoding: 7bit
MIME-Version: 1.0
From: "Content-filter at spam.minet.net" <po...@minet.net>
-Date: Wed, 3 Jun 2015 09:05:46 +0000
+Date: Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
To: <ro...@listes.minet.net>
Message-ID: <VA...@spam.minet.net>
Subject: [root] UNCHECKED contents in mail FROM <qu...@riseup.net>
Modified: james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json
URL: http://svn.apache.org/viewvc/james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json?rev=1688139&r1=1688138&r2=1688139&view=diff
==============================================================================
--- james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json (original)
+++ james/mailbox/trunk/elasticsearch/src/test/resources/documents/spamMail.json Mon Jun 29 08:39:30 2015
@@ -12,7 +12,7 @@
"1.0"
],
"date": [
- "Wed, 3 Jun 2015 09:05:46 +0000"
+ "Wed, 3 Jun 2015 09:05:46 +0000 (UTC)"
],
"x-beenthere": [
"root@listes.minet.net"
@@ -103,7 +103,7 @@
"subject": [
"[root] UNCHECKED contents in mail FROM <qu...@riseup.net>"
],
- "sentDate": "2015-06-07T00:00:00+0200",
+ "sentDate": "2015-06-03T09:05:46+0000",
"properties": [
{
"namespace": "http://james.apache.org/rfc2045/Content-Type",
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org