You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2017/08/25 08:32:48 UTC
[10/13] james-project git commit: JAMES-2046 Implement robust
SentDate parsing for Memory search
JAMES-2046 Implement robust SentDate parsing for Memory search
Reuse ElasticSearch code.
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo
Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/21ce9e82
Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/21ce9e82
Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/21ce9e82
Branch: refs/heads/master
Commit: 21ce9e82a5effac912841eede6fc54a09d7d0090
Parents: eaedcd2
Author: benwa <bt...@linagora.com>
Authored: Thu Aug 24 13:40:16 2017 +0700
Committer: benwa <bt...@linagora.com>
Committed: Fri Aug 25 15:31:42 2017 +0700
----------------------------------------------------------------------
.../elasticsearch/json/HeaderCollection.java | 42 +----------
.../json/HeaderCollectionTest.java | 28 --------
.../search/comparator/CombinedComparator.java | 13 ++--
.../search/comparator/SentDateComparator.java | 73 +++++++++++++-------
.../comparator/SentDateComparatorTest.java | 68 ++++++++++++++++++
5 files changed, 124 insertions(+), 100 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/james-project/blob/21ce9e82/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
index 2b26a66..85782ad 100644
--- a/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
+++ b/mailbox/elasticsearch/src/main/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollection.java
@@ -24,23 +24,18 @@ import java.util.HashSet;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.james.mailbox.store.search.SearchUtil;
+import org.apache.james.mailbox.store.search.comparator.SentDateComparator;
import org.apache.james.mime4j.dom.address.Address;
import org.apache.james.mime4j.dom.address.Group;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.field.address.LenientAddressParser;
import org.apache.james.mime4j.stream.Field;
import org.apache.james.mime4j.util.MimeUtil;
-import org.apache.james.util.date.ImapDateTimeFormatter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMultimap;
@@ -51,14 +46,6 @@ public class HeaderCollection {
public static class Builder {
- // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
- // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
- // This REGEXP is here to match ( in order to remove ) the possible invalid end of a header date
- // Example of matching patterns :
- // (UTC)
- // (CEST)
- private static final Pattern DATE_SANITIZING_PATTERN = Pattern.compile(" *\\(.*\\) *");
-
private final Set<EMailer> toAddressSet;
private final Set<EMailer> fromAddressSet;
private final Set<EMailer> ccAddressSet;
@@ -114,7 +101,7 @@ public class HeaderCollection {
subjectSet.add(headerValue);
break;
case DATE:
- sentDate = toISODate(headerValue);
+ sentDate = SentDateComparator.toISODate(headerValue);
break;
}
}
@@ -152,29 +139,6 @@ public class HeaderCollection {
}
throw new RuntimeException(headerName + " is not a address header name");
}
-
- private Optional<ZonedDateTime> toISODate(String value) {
- try {
- return Optional.of(ZonedDateTime.parse(
- sanitizeDateStringHeaderValue(value),
- ImapDateTimeFormatter.rfc5322()));
- } catch (Exception e) {
- LOGGER.info("Can not parse receive date " + value);
- return Optional.empty();
- }
- }
-
- @VisibleForTesting String sanitizeDateStringHeaderValue(String value) {
- // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
- // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
- // This method is here to convert the first date into something parsable by RFC_1123_DATE_TIME DateTimeFormatter
- Matcher sanitizerMatcher = DATE_SANITIZING_PATTERN.matcher(value);
- if (sanitizerMatcher.find()) {
- return value.substring(0 , sanitizerMatcher.start());
- }
- return value;
- }
-
}
public static final String TO = "to";
@@ -185,8 +149,6 @@ public class HeaderCollection {
public static final String SUBJECT = "subject";
public static final String DATE = "date";
- private static final Logger LOGGER = LoggerFactory.getLogger(HeaderCollection.class);
-
public static Builder builder() {
return new Builder();
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/21ce9e82/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
----------------------------------------------------------------------
diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
index b1f8fae..93a7b02 100644
--- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
+++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/HeaderCollectionTest.java
@@ -267,32 +267,4 @@ public class HeaderCollectionTest {
HeaderCollection.builder().add(null).build();
}
- @Test
- public void sanitizeDateStringHeaderValueShouldRemoveCESTPart() {
- assertThat(HeaderCollection.builder()
- .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (CEST)"))
- .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
- }
-
- @Test
- public void sanitizeDateStringHeaderValueShouldRemoveUTCPart() {
- assertThat(HeaderCollection.builder()
- .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (UTC) "))
- .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
- }
-
- @Test
- public void sanitizeDateStringHeaderValueShouldNotChangeAcceptableString() {
- assertThat(HeaderCollection.builder()
- .sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200"))
- .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
- }
-
- @Test
- public void sanitizeDateStringHeaderValueShouldNotChangeEmptyString() {
- assertThat(HeaderCollection.builder()
- .sanitizeDateStringHeaderValue(""))
- .isEqualTo("");
- }
-
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/21ce9e82/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
index 38bae1d..53d270f 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/CombinedComparator.java
@@ -95,14 +95,11 @@ public class CombinedComparator implements Comparator<MailboxMessage>{
@Override
public int compare(MailboxMessage o1, MailboxMessage o2) {
- int i = 0;
- for (Comparator<MailboxMessage> comparator : comparators) {
- i = comparator.compare(o1, o2);
- if (i != 0) {
- break;
- }
- }
- return i;
+ return comparators.stream()
+ .map(comparator -> comparator.compare(o1, o2))
+ .filter(result -> result != 0)
+ .findFirst()
+ .orElse(0);
}
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/21ce9e82/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/SentDateComparator.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/SentDateComparator.java b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/SentDateComparator.java
index 8a1373e..3a897f7 100644
--- a/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/SentDateComparator.java
+++ b/mailbox/store/src/main/java/org/apache/james/mailbox/store/search/comparator/SentDateComparator.java
@@ -18,44 +18,69 @@
****************************************************************/
package org.apache.james.mailbox.store.search.comparator;
-import java.io.StringReader;
+import java.time.Instant;
+import java.time.ZonedDateTime;
import java.util.Comparator;
-import java.util.Date;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.james.mailbox.store.mail.model.MailboxMessage;
-import org.apache.james.mime4j.dom.datetime.DateTime;
-import org.apache.james.mime4j.field.datetime.parser.DateTimeParser;
-import org.apache.james.mime4j.field.datetime.parser.ParseException;
+import org.apache.james.util.date.ImapDateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
/**
* {@link Comparator} which works like stated in RFC5256 2.2 Sent Date
- *
*/
public class SentDateComparator extends AbstractHeaderComparator {
+
public final static Comparator<MailboxMessage> SENTDATE = new SentDateComparator();
+ private static final Logger LOGGER = LoggerFactory.getLogger(SentDateComparator.class);
+ // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
+ // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
+ // This REGEXP is here to match ( in order to remove ) the possible invalid end of a header date
+ // Example of matching patterns :
+ // (UTC)
+ // (CEST)
+ private static final Pattern DATE_SANITIZING_PATTERN = Pattern.compile(" *\\(.*\\) *");
+
+ public static Optional<ZonedDateTime> toISODate(String value) {
+ try {
+ return Optional.of(ZonedDateTime.parse(
+ sanitizeDateStringHeaderValue(value),
+ ImapDateTimeFormatter.rfc5322()));
+ } catch (Exception e) {
+ LOGGER.info("Can not parse receive date " + value);
+ return Optional.empty();
+ }
+ }
+
+ @VisibleForTesting
+ static String sanitizeDateStringHeaderValue(String value) {
+ // Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
+ // Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
+ // This method is here to convert the first date into something parsable by RFC_1123_DATE_TIME DateTimeFormatter
+ Matcher sanitizerMatcher = DATE_SANITIZING_PATTERN.matcher(value);
+ if (sanitizerMatcher.find()) {
+ return value.substring(0 , sanitizerMatcher.start());
+ }
+ return value;
+ }
@Override
public int compare(MailboxMessage o1, MailboxMessage o2) {
- Date date1 = getSentDate(o1);
- Date date2 = getSentDate(o2);
- int i = date1.compareTo(date2);
-
- // sent date was the same so use the uid as tie-breaker
- if (i == 0) {
- return UidComparator.UID.compare(o1, o2);
- }
- return 0;
+ Instant date1 = getSentDate(o1);
+ Instant date2 = getSentDate(o2);
+ return date1.compareTo(date2);
}
- private Date getSentDate(MailboxMessage message) {
+ private Instant getSentDate(MailboxMessage message) {
final String value = getHeaderValue("Date", message);
- final StringReader reader = new StringReader(value);
- try {
- DateTime dateTime = new DateTimeParser(reader).parseAll();
- return dateTime.getDate();
- } catch (ParseException e) {
- // if we can not parse the date header we should use the internaldate as fallback
- return message.getInternalDate();
- }
+ return toISODate(value)
+ .map(ZonedDateTime::toInstant)
+ .orElse(message.getInternalDate().toInstant());
}
}
http://git-wip-us.apache.org/repos/asf/james-project/blob/21ce9e82/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/comparator/SentDateComparatorTest.java
----------------------------------------------------------------------
diff --git a/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/comparator/SentDateComparatorTest.java b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/comparator/SentDateComparatorTest.java
new file mode 100644
index 0000000..ddd5a63
--- /dev/null
+++ b/mailbox/store/src/test/java/org/apache/james/mailbox/store/search/comparator/SentDateComparatorTest.java
@@ -0,0 +1,68 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mailbox.store.search.comparator;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.Test;
+
+public class SentDateComparatorTest {
+ @Test
+ public void sanitizeDateStringHeaderValueShouldRemoveCESTPart() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (CEST)"))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldRemoveUTCPart() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200 (UTC) "))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldNotChangeAcceptableString() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue("Thu, 18 Jun 2015 04:09:35 +0200"))
+ .isEqualTo("Thu, 18 Jun 2015 04:09:35 +0200");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldRemoveBrackets() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue("invalid (removeMe)"))
+ .isEqualTo("invalid");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldKeepUnclosedBrackets() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue("invalid (removeMe"))
+ .isEqualTo("invalid (removeMe");
+ }
+
+ @Test
+ public void sanitizeDateStringHeaderValueShouldNotChangeEmptyString() {
+ assertThat(
+ SentDateComparator.sanitizeDateStringHeaderValue(""))
+ .isEqualTo("");
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org