You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2023/01/19 11:26:41 UTC

[tika] branch main updated: TIKA-3957 (#910)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new ac9c0f899 TIKA-3957 (#910)
ac9c0f899 is described below

commit ac9c0f899a2226d2196ca67eca06d499bf9f8e74
Author: Tim Allison <ta...@apache.org>
AuthorDate: Thu Jan 19 06:26:33 2023 -0500

    TIKA-3957 (#910)
    
    * TIKA-3957 -- refactor date parsing out of the MailContentHandler and prefer thread-safe DateTimeFormatters over thread-unsafe SimpleDateFormats, add workarounds for at least 2 bugs in jdk8.
---
 tika-parent/pom.xml                                |   2 +-
 .../tika/parser/mailcommons/MailDateParser.java    | 577 ++++++++++++++++++++-
 .../parser/mailcommons/MailDateParserTest.java     | 186 +++++++
 .../tika/parser/mail/MailContentHandler.java       | 125 +----
 .../org/apache/tika/parser/mbox/MboxParser.java    |  13 +-
 .../apache/tika/parser/mail/RFC822ParserTest.java  |  75 +--
 .../tika/parser/microsoft/OutlookExtractor.java    |   7 +-
 7 files changed, 785 insertions(+), 200 deletions(-)

diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 8c3cefed0..64e2a48ee 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -365,7 +365,7 @@
     <lucene.version>8.11.2</lucene.version>
     <metadata.extractor.version>2.18.0</metadata.extractor.version>
     <microsoft.translator.version>0.6.2</microsoft.translator.version>
-    <!-- 0.8.6 is built with java 11 and does not work with Java 8 -->
+    <!-- 0.8.5 is built with java 11 and does not work with Java 8 -->
     <mime4j.version>0.8.4</mime4j.version>
     <mockito.version>4.11.0</mockito.version>
     <netcdf-java.version>4.5.5</netcdf-java.version>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/main/java/org/apache/tika/parser/mailcommons/MailDateParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/main/java/org/apache/tika/parser/mailcommons/MailDateParser.java
index 8ab470074..3a5220710 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/main/java/org/apache/tika/parser/mailcommons/MailDateParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/main/java/org/apache/tika/parser/mailcommons/MailDateParser.java
@@ -16,15 +16,584 @@
  */
 package org.apache.tika.parser.mailcommons;
 
+import static java.time.ZoneOffset.UTC;
+import static java.time.temporal.ChronoField.AMPM_OF_DAY;
+import static java.time.temporal.ChronoField.DAY_OF_MONTH;
+import static java.time.temporal.ChronoField.DAY_OF_WEEK;
+import static java.time.temporal.ChronoField.HOUR_OF_AMPM;
+import static java.time.temporal.ChronoField.HOUR_OF_DAY;
+import static java.time.temporal.ChronoField.INSTANT_SECONDS;
+import static java.time.temporal.ChronoField.MILLI_OF_SECOND;
+import static java.time.temporal.ChronoField.MINUTE_OF_HOUR;
+import static java.time.temporal.ChronoField.MONTH_OF_YEAR;
+import static java.time.temporal.ChronoField.OFFSET_SECONDS;
+import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
+import static java.time.temporal.ChronoField.YEAR;
+import static org.apache.tika.utils.DateUtils.MIDDAY;
+
 import java.text.ParseException;
-import java.text.SimpleDateFormat;
+import java.text.ParsePosition;
+import java.time.DateTimeException;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZonedDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.format.DateTimeParseException;
+import java.time.format.ResolverStyle;
+import java.time.format.SignStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.TemporalAccessor;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Locale;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.utils.StringUtils;
 
+/**
+ * Dates in emails are a mess.  There are at least two major date related bugs in JDK 8.
+ * This class does its best to parse date strings.  It does have a US-based date bias.
+ * Please open a ticket to fix this.  We can also add overrides via the parser config
+ * to manage custom dates.
+ */
 public class MailDateParser {
-    public static Date parseDate(String headerContent) throws ParseException {
-        SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
-        return dateFormat.parse(headerContent);
+
+    //TIKA-1970 Mac Mail's format is GMT+1 so we need to check for hour only
+    //Also, there are numerous bugs in jdk 8 with localized offsets
+    //so we need to get rid of the GMT/UTC component (e.g. https://bugs.openjdk.org/browse/JDK-8154520)
+    private static final Pattern LOCALIZED_OFFSET_PATTERN =
+            Pattern.compile("(?:UTC|GMT)\\s*([-+])\\s*(\\d?\\d):?(\\d\\d)?\\Z");
+
+    //this is used to strip junk after a fairly full offset:
+    // Wed, 26 Jan 2022 09:14:37 +0100 (CET)
+    private static final Pattern OFFSET_PATTERN =
+            Pattern.compile("[-+]\\s*\\d?\\d:?\\d\\d");
+
+    private static final Pattern DAYS_OF_WEEK =
+            Pattern.compile("(?:\\A| )(MON|MONDAY|TUE|TUES|TUESDAY|WED|WEDNESDAY|THU|THUR|THURS" +
+                    "|THURSDAY|FRI|FRIDAY|SAT|SATURDAY|SUN|SUNDAY) ");
+
+    //find a time ending in am/pm without a space: 10:30am and
+    //use this pattern to insert space: 10:30 am
+    private static final Pattern AM_PM = Pattern.compile("(?i)(\\d)([ap]m)\\b");
+
+    //Taken nearly directly from mime4j
+    private static Map<Long, String> monthOfYear() {
+        HashMap<Long, String> result = new HashMap<>();
+        result.put(1L, "JAN");
+        result.put(2L, "FEB");
+        result.put(3L, "MAR");
+        result.put(4L, "APR");
+        result.put(5L, "MAY");
+        result.put(6L, "JUN");
+        result.put(7L, "JUL");
+        result.put(8L, "AUG");
+        result.put(9L, "SEP");
+        result.put(10L, "OCT");
+        result.put(11L, "NOV");
+        result.put(12L, "DEC");
+        return result;
+    }
+
+    private static Map<Long, String> dayOfWeek() {
+        HashMap<Long, String> result = new HashMap<>();
+        result.put(1L, "MON");
+        result.put(2L, "TUE");
+        result.put(3L, "WED");
+        result.put(4L, "THU");
+        result.put(5L, "FRI");
+        result.put(6L, "SAT");
+        result.put(7L, "SUN");
+        return result;
     }
 
+    private static final int INITIAL_YEAR = 1970;
+
+    private static final DateTimeFormatter TIME_ZONE_FORMATTER
+            = new DateTimeFormatterBuilder()
+            .parseCaseInsensitive()
+            .parseLenient()
+            .optionalStart()
+            .appendLiteral(' ') //optional space before any of the time zone offset/ids
+            .optionalEnd()
+            .optionalStart()
+            .appendZoneId()
+            .optionalEnd()
+            .optionalStart()
+            .appendPattern("X")//localized zone offset, e.g. Z; -08; -0830; -08:30; -083015; -08:30:15
+            .optionalEnd()
+            .optionalStart()
+            .appendPattern("z")//zone name, e.g. PST
+            .optionalEnd().toFormatter(Locale.US);
+
+
+    public static final DateTimeFormatter RFC_5322 = new DateTimeFormatterBuilder()
+            .parseCaseInsensitive()
+            .parseLenient()
+            .optionalStart()
+            .appendText(DAY_OF_WEEK, dayOfWeek())
+            .appendLiteral(", ")
+            .optionalEnd()
+            .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NOT_NEGATIVE)
+            .appendLiteral(' ')
+            .appendText(MONTH_OF_YEAR, monthOfYear())
+            .appendLiteral(' ')
+            .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+            .appendLiteral(' ')
+            .appendValue(HOUR_OF_DAY, 2)
+            .appendLiteral(':')
+            .appendValue(MINUTE_OF_HOUR, 2)
+            .optionalStart()
+            .appendLiteral(':')
+            .appendValue(SECOND_OF_MINUTE, 2)
+            .optionalEnd()
+            .optionalStart()
+            .appendLiteral('.')
+            .appendValue(MILLI_OF_SECOND, 3)
+            .optionalEnd()
+            .optionalStart()
+            .appendLiteral(' ')
+            .appendOffset("+HHMM", "GMT")
+            .optionalEnd()
+            .toFormatter(Locale.US)
+            //.withZone(ZoneId.of("GMT")) see TIKA-3735
+            .withResolverStyle(ResolverStyle.LENIENT)
+            .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_DAY, MINUTE_OF_HOUR,
+                    SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+    public static final DateTimeFormatter RFC_5322_LENIENT = new DateTimeFormatterBuilder()
+            .parseCaseInsensitive()
+            .parseLenient()
+            .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+            .appendLiteral(' ')
+            .appendPattern("MMM")
+            .appendLiteral(' ')
+            .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+            .appendLiteral(' ')
+            .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NEVER)
+            .appendLiteral(':')
+            .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+            .optionalStart()
+            .appendLiteral(':')
+            .appendValue(SECOND_OF_MINUTE, 2)
+            .optionalEnd()
+            .optionalStart()
+            .appendLiteral('.')
+            .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+            .optionalEnd()
+            .optionalStart()
+            .append(TIME_ZONE_FORMATTER)
+            .optionalEnd()
+            .toFormatter(Locale.US)
+            //.withZone(ZoneId.of("GMT")) see TIKA-3735
+            .withResolverStyle(ResolverStyle.LENIENT)
+            .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR,
+                    HOUR_OF_DAY, MINUTE_OF_HOUR,
+                    SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+
+    //this differs only from RFC_5322_LENIENT in requiring am/pm
+    public static final DateTimeFormatter RFC_5322_AMPM_LENIENT = new DateTimeFormatterBuilder()
+            .parseCaseInsensitive()
+            .parseLenient()
+            .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+            .appendLiteral(' ')
+            .appendPattern("MMM")
+            .appendLiteral(' ')
+            .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+            .appendLiteral(' ')
+            .appendValue(ChronoField.HOUR_OF_AMPM, 1, 2, SignStyle.NEVER)
+            .appendLiteral(':')
+            .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+            .optionalStart()
+            .appendLiteral(':')
+            .appendValue(SECOND_OF_MINUTE, 2)
+            .optionalEnd()
+            .optionalStart()
+            .appendLiteral('.')
+            .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+            .optionalEnd()
+            .optionalStart()
+            .appendLiteral(' ') //optional space before am/pm
+            .optionalEnd()
+            .appendText(ChronoField.AMPM_OF_DAY)
+            .optionalStart()
+            .optionalStart()
+            .append(TIME_ZONE_FORMATTER)
+            .optionalEnd()
+            .toFormatter(Locale.US)
+            //.withZone(ZoneId.of("GMT")) see TIKA-3735
+            .withResolverStyle(ResolverStyle.LENIENT)
+            .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_AMPM, AMPM_OF_DAY,
+                    MINUTE_OF_HOUR,
+                    SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+
+    public static final DateTimeFormatter MMM_D_YYYY_HH_MM_AM_PM = // "July 9 2012 10:10:10 am UTC"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendPattern("MMM")
+                    .appendLiteral(' ')
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(' ')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .appendLiteral(' ')
+                    .appendValue(ChronoField.HOUR_OF_AMPM, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(':')
+                    .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(SECOND_OF_MINUTE, 2)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral('.')
+                    .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral(' ') //optional space before am/pm
+                    .optionalEnd()
+                    .appendText(ChronoField.AMPM_OF_DAY)
+                    .optionalStart()
+                    .append(TIME_ZONE_FORMATTER)
+                    .optionalEnd()
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_AMPM, AMPM_OF_DAY,
+                            MINUTE_OF_HOUR,
+                            SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+    public static final DateTimeFormatter MMM_D_YYYY_HH_MM = // "July 9 2012 10:10:10 UTC"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendPattern("MMM")
+                    .appendLiteral(' ')
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(' ')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .appendLiteral(' ')
+                    .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(':')
+                    .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(SECOND_OF_MINUTE, 2)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral('.')
+                    .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .optionalStart()
+                    .append(TIME_ZONE_FORMATTER)
+                    .optionalEnd()
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_DAY,
+                            MINUTE_OF_HOUR,
+                            SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+    public static final DateTimeFormatter MM_SLASH_DD_SLASH_YY_HH_MM = //
+            // US-based month/day ordering !!!! e.g. 7/9/2012 10:10:10"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NEVER)
+                    .appendLiteral('/')
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral('/')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .appendLiteral(' ')
+                    .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NEVER)
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(SECOND_OF_MINUTE, 2)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral('.')
+                    .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .append(TIME_ZONE_FORMATTER)
+                    .optionalEnd()
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_DAY,
+                            MINUTE_OF_HOUR,
+                            SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+    public static final DateTimeFormatter MM_SLASH_DD_SLASH_YY_HH_MM_AM_PM =
+            // US-based month/day ordering !!!! e.g. 7/9/2012 10:10:10 AM UTC"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendValue(MONTH_OF_YEAR, 1, 2, SignStyle.NEVER)
+                    .appendLiteral('/')
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral('/')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .appendLiteral(' ')
+                    .appendValue(HOUR_OF_AMPM, 1, 2, SignStyle.NEVER)
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(SECOND_OF_MINUTE, 2)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral('.')
+                    .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral(' ')
+                    .optionalEnd()
+                    .appendText(AMPM_OF_DAY)
+                    .optionalStart()
+                    .append(TIME_ZONE_FORMATTER)
+                    .optionalEnd()
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_AMPM,
+                            AMPM_OF_DAY,
+                            MINUTE_OF_HOUR,
+                            SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+    public static final DateTimeFormatter YYYY_MM_DD_HH_MM = // "2012-10-10 10:10:10 UTC"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendValue(YEAR, 4)
+                    .appendLiteral('-')
+                    .appendValue(MONTH_OF_YEAR, 2, 2, SignStyle.NEVER)
+                    .appendLiteral('-')
+                    .appendValue(DAY_OF_MONTH, 2, 2, SignStyle.NEVER)
+                    .appendLiteral(' ')
+                    .appendValue(HOUR_OF_DAY, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(':')
+                    .appendValue(MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
+                    .optionalStart()
+                    .appendLiteral(':')
+                    .appendValue(SECOND_OF_MINUTE, 2)
+                    .optionalEnd()
+                    .optionalStart()
+                    .appendLiteral('.')
+                    .appendValue(MILLI_OF_SECOND, 3, 5, SignStyle.NEVER)
+                    .optionalEnd()
+                    .optionalStart()
+                    .append(TIME_ZONE_FORMATTER)
+                    .optionalEnd()
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR, HOUR_OF_DAY,
+                            MINUTE_OF_HOUR,
+                            SECOND_OF_MINUTE, MILLI_OF_SECOND, OFFSET_SECONDS);
+
+    public static final DateTimeFormatter YYYY_MM_DD = // "2012-10-10"
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendValue(YEAR, 4)
+                    .appendLiteral('-')
+                    .appendValue(MONTH_OF_YEAR, 2, 2, SignStyle.NEVER)
+                    .appendLiteral('-')
+                    .appendValue(DAY_OF_MONTH, 2, 2, SignStyle.NEVER)
+                    .toFormatter(Locale.US)
+                    //.withZone(ZoneId.of("GMT")) see TIKA-3735
+                    .withResolverStyle(ResolverStyle.LENIENT)
+                    .withResolverFields(DAY_OF_MONTH, MONTH_OF_YEAR, YEAR);
+
+    public static final DateTimeFormatter MM_SLASH_DD_SLASH_YYYY =
+            new DateTimeFormatterBuilder()
+                    .appendPattern("M/d/")
+                    .appendValueReduced(ChronoField.YEAR, 2, 4, INITIAL_YEAR)
+                    .toFormatter(Locale.US).withZone(MIDDAY.toZoneId());
+
+    public static final DateTimeFormatter DD_SLASH_MM_SLASH_YYYY =
+            new DateTimeFormatterBuilder()
+                    .appendPattern("d/M/")
+                    .appendValueReduced(ChronoField.YEAR, 2, 4, INITIAL_YEAR)
+                    .toFormatter(Locale.US).withZone(MIDDAY.toZoneId());
+    public static final DateTimeFormatter MMM_DD_YY =
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendPattern("MMM")
+                    .appendLiteral(' ')
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(' ')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .toFormatter(Locale.US);
+
+    public static final DateTimeFormatter DD_MMM_YY =
+            new DateTimeFormatterBuilder()
+                    .parseCaseInsensitive()
+                    .parseLenient()
+                    .appendValue(DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
+                    .appendLiteral(' ')
+                    .appendPattern("MMM")
+                    .appendLiteral(' ')
+                    .appendValueReduced(YEAR, 2, 4, INITIAL_YEAR)
+                    .toFormatter(Locale.US);
+
+    public static final DateTimeFormatter YY_SLASH_MM_SLASH_DD =
+            new DateTimeFormatterBuilder()
+                    .appendValueReduced(ChronoField.YEAR, 2, 4, INITIAL_YEAR)
+                    .appendPattern("/M/d")
+                    .toFormatter(Locale.US).withZone(MIDDAY.toZoneId());
+
+
+    private static final DateTimeFormatter[] DATE_FORMATTERS = new DateTimeFormatter[] {
+            DD_MMM_YY,
+            MMM_DD_YY,
+            YYYY_MM_DD,
+            MM_SLASH_DD_SLASH_YYYY,//try American first?
+            DD_SLASH_MM_SLASH_YYYY,//if that fails, try rest of world?
+            YY_SLASH_MM_SLASH_DD
+    };
+
+
+
+    private static final DateTimeFormatter[] DATE_TIME_FORMATTERS = new DateTimeFormatter[] {
+            RFC_5322_LENIENT,
+            RFC_5322_AMPM_LENIENT,
+            MMM_D_YYYY_HH_MM,
+            MMM_D_YYYY_HH_MM_AM_PM,
+            YYYY_MM_DD_HH_MM,
+            MM_SLASH_DD_SLASH_YY_HH_MM,
+            MM_SLASH_DD_SLASH_YY_HH_MM_AM_PM
+
+    };
+    public static Date parseRFC5322(String string) throws ParseException {
+        //this fails on: MON, 9 MAY 2016 3:32:00 GMT+0200 ... it stops short and doesn't include
+        // the +0200?!
+        if (string != null) {
+            string = string.trim();
+            string = string.toUpperCase(Locale.US);
+        }
+        return Date.from(Instant.from(RFC_5322.parse(string, new ParsePosition(0))));
+    }
+
+    public static Date parseDateLenient(String text) {
+        if (text == null) {
+            return null;
+        }
+        String normalized = normalize(text);
+        for (DateTimeFormatter dateTimeFormatter : DATE_TIME_FORMATTERS) {
+            try {
+                ZonedDateTime zonedDateTime = ZonedDateTime.parse(normalized, dateTimeFormatter);
+                return Date.from(Instant.from(zonedDateTime));
+            } catch (SecurityException e) {
+                throw e;
+            } catch (DateTimeParseException e) {
+
+                //There's a bug in java 8 that if we include .withZone in the DateTimeFormatter,
+                //that will override the offset/timezone id even if it included
+                // in the original string.  This is fixed in later versions of Java.
+                // Once we move to Java 11, we can get rid of this. Can't make this up...
+                try {
+                    LocalDateTime localDateTime = LocalDateTime.parse(normalized, dateTimeFormatter);
+                    return Date.from(Instant.from(localDateTime.atOffset(UTC)));
+                } catch (SecurityException e2) {
+                    throw e2;
+                } catch (Exception e2) {
+                    //swallow
+                }
+            } catch (Exception e) {
+                //can get StringIndexOutOfBoundsException because of a bug in java 8
+                //ignore
+            }
+        }
+
+
+        for (DateTimeFormatter dateFormatter : DATE_FORMATTERS) {
+            try {
+                TemporalAccessor temporalAccessor = dateFormatter.parse(normalized);
+                ZonedDateTime localDate = LocalDate.from(temporalAccessor)
+                        .atStartOfDay()
+                        .atZone(MIDDAY.toZoneId());
+                return Date.from(Instant.from(localDate));
+            } catch (SecurityException e) {
+                throw e;
+            } catch (Exception e) {
+                //ignore
+            }
+        }
+        return null;
+    }
+
+    private static boolean hasInstantSeconds(TemporalAccessor temporalAccessor) {
+        try {
+            temporalAccessor.getLong(INSTANT_SECONDS);
+            return true;
+        } catch (DateTimeException e) {
+            return false;
+        }
+    }
+
+    private static String normalize(String text) {
+
+        text = text.toUpperCase(Locale.US);
+
+        //strip out commas
+        text = text.replaceAll(",", "");
+
+        //strip off extra stuff after +0800, e.g. "Mon, 9 May 2016 7:32:00 UTC+0600 (BST)",
+        Matcher matcher = OFFSET_PATTERN.matcher(text);
+        if (matcher.find()) {
+            text = text.substring(0, matcher.end());
+        }
+
+        matcher = LOCALIZED_OFFSET_PATTERN.matcher(text);
+        if (matcher.find()) {
+            text = buildLocalizedOffset(matcher, text);
+        }
+
+        matcher = AM_PM.matcher(text);
+        if (matcher.find()) {
+            text = matcher.replaceFirst("$1 $2");
+        }
+        //The rfc_lenient parser had a problem parsing dates
+        //with days of week missing and a timezone: 9 May 2016 01:32:00 UTC
+        //The day of week is not used in the resolvers, so we may as well throw
+        //out that info
+        matcher = DAYS_OF_WEEK.matcher(text);
+        if (matcher.find()) {
+            text = matcher.replaceAll(" ");
+        }
+        //16 May 2016 at 09:30:32  GMT+1
+        text = text.replaceAll("(?i) at ", " ");
+        //just cause
+        text = text.replaceAll("\\s+", " ").trim();
+        return text;
+    }
+
+    private static String buildLocalizedOffset(Matcher matcher, String text) {
+        StringBuilder sb = new StringBuilder();
+        sb.append(text.substring(0, matcher.start()));
+        sb.append(matcher.group(1));// +/-
+        sb.append(StringUtils.leftPad(matcher.group(2), 2, '0'));//HH
+        sb.append(":");
+        if (matcher.group(3) != null) {
+            sb.append(matcher.group(3));
+        } else {
+            sb.append("00");
+        }
+        sb.append(text.substring(matcher.end()));
+        return sb.toString();
+    }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/test/java/org/apache/tika/parser/mailcommons/MailDateParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/test/java/org/apache/tika/parser/mailcommons/MailDateParserTest.java
new file mode 100644
index 000000000..37a7a29d3
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-commons/src/test/java/org/apache/tika/parser/mailcommons/MailDateParserTest.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mailcommons;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+public class MailDateParserTest {
+
+    @Test
+    public void testDateTimesWithTimeZones() throws Exception {
+        String expected = "2016-05-09T01:32:00Z";
+
+        //try with timezones
+        for (String dateString : new String[] {
+                // with timezone info:
+                "Mon, 9 May 16 01:32:00 GMT",
+                "9 May 16 01:32:00 GMT",
+                "Monday, 9 May 16 01:32:00 GMT",
+                "Mon, 9 May 2016 01:32:00 UTC",
+                "9 May 2016 01:32:00 UTC",
+                "09 May 2016 01:32:00 UTC",
+                "Mon, 9 May 2016 01:32:00Z",
+                "Mon, 9 May 2016 01:32:00 Z",
+                "Mon, 9 May 2016 01:32:00 GMT",
+                "Mon, 9 May 2016 01:32:00GMT",
+                "Mon, 9 May 2016 01:32:00 UTC",
+                "Mon, 9 May 2016 01:32:00UTC",
+
+                "Mon, 9 May 2016 3:32:00 GMT+0200",
+                "Mon, 9 May 2016 3:32:00 UTC+0200",
+                "Mon, 9 May 2016 7:32:00 UTC+0600 (BST)",
+
+                //try with leading space
+                "      Mon, 9 May 2016 3:32:00 +0200",
+                "       9 May 2016 3:32:00 +0200",
+                "Mon, 9 May 2016 3:32:00 +02:00",
+                "9 May 2016 3:32:00 +02:00",
+                "Mon, 9 May 2016 3:32:00+02:00",
+                "Mon, 9 May 2016 3:32:00+0200",
+                "      Sun, 8 May 2016 21:32:00 EST",
+                //need to add am/pm format times?  I hope not.
+
+        }) {
+            testDate(dateString, expected, true);
+        }
+    }
+
+    @Test
+    @Disabled("for dev purposes")
+    public void oneOff() throws Exception {
+  /*      SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss z");
+        System.out.println(simpleDateFormat.format(new Date()));
+        DateTimeFormatter formatter = DateTimeFormatter
+                .ofPattern("yyyy-MM-dd'T'HH:mm:ss.S OOOO")
+                .withLocale(Locale.US);
+        String date = formatter.format(ZonedDateTime.now(ZoneOffset.UTC));
+        System.out.println("String: " + date);
+        System.out.println("parsed: " + formatter.parse(date) + " from " + date);
+*/
+        String s = "Mon, 6 Sep 2010 05:25:34 -0400 (EDT)";
+        s = "Tue, 9 Jun 2009 23:58:45 -0400";
+
+        //System.out.println(RFC)
+        try {
+            //turn this back on when we upgrade
+            //System.out.println("mime4j: " + DateTimeFieldLenientImpl.RFC_5322.parse(s));
+        } catch (Exception e) {
+            System.out.println("mime4j: null");
+        }
+        try {
+            Date d = MailDateParser.parseDateLenient(s);
+            DateFormat df =
+                    new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", new DateFormatSymbols(Locale.US));
+            df.setTimeZone(TimeZone.getTimeZone("UTC"));
+            String dateString = df.format(d);
+            System.out.println("dev parser lenient: " + dateString);
+        } catch (Exception e) {
+            System.out.println("dev parser lenient: null");
+        }
+    }
+
+    @Test
+    public void testDateTimesWithNoTimeZone() throws Exception {
+        String expected = "2016-05-09T01:32:00Z";
+
+        for (String dateString : new String[]{
+                "Mon, 9 May 2016 01:32:00",
+                "Monday, 9 May 2016 1:32 AM", "May 9 2016 1:32am", "May 9 2016 1:32 am",
+                "2016-05-09 01:32:00"}) {
+            testDate(dateString, expected, true);
+        }
+    }
+
+    @Test
+    public void testDates() throws Exception {
+        //now try days without times
+        String expected = "2016-05-15T12:00:00Z";
+        for (String dateString : new String[]{
+                "May 15, 2016", "Sun, 15 May 2016", "15 May 2016",
+                "2016-05-15"
+        }) {
+            testDate(dateString, expected, true);
+
+        }
+    }
+
+    @Test
+    public void testTrickyDates() throws Exception {
+        DateFormat df = new SimpleDateFormat("yyyy-MM-dd", new DateFormatSymbols(Locale.US));
+        //make sure there are no mis-parses of e.g. 90 = year 90 A.D, not 1990
+        Date date1980 = df.parse("1980-01-01");
+        Date date2010 = df.parse("2010-01-01");
+        for (String dateString : new String[]{
+                "11/14/08",
+                "1/14/08",
+                "1/2/08",
+                "12/1/2008",
+                "12/02/1996",
+                "96/1/02",
+                "96/12/02",
+                "96/12/2",
+                "1996/12/02",
+                "Mon, 29 Jan 96 14:02 GMT",
+                "7/20/95 1:12PM",
+                "08/14/2000  12:48 AM",
+                "8/4/2000  1:48 AM",
+                "06/24/2008, Tuesday, 11 AM",
+                }) {
+            Date parsedDate = MailDateParser.parseDateLenient(dateString);
+            assertNotNull(parsedDate);
+            if (parsedDate != null) {
+                assertTrue(parsedDate.getTime() > date1980.getTime(),
+                        "date must be after 1980:" + dateString + " >> + " +
+                                parsedDate);
+                assertTrue(parsedDate.getTime() < date2010.getTime(),
+                        "date must be before 2020: " + dateString + " >> + " +
+                                parsedDate);
+            }
+        }
+        //TODO: mime4j misparses these to pre 1980 dates
+        //"Wed, 27 Dec 95 11:20:40 EST",
+        //"26 Aug 00 11:14:52 EDT"
+        //
+        //We are still misparsing: 8/1/03 to a pre 1980 date
+
+    }
+
+    private void testDate(String dateString, String expected, boolean useUTC) throws Exception {
+        Date parsedDate = MailDateParser.parseDateLenient(dateString);
+        assertNotNull(parsedDate, "couldn't parse " + dateString);
+        DateFormat df =
+                new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", new DateFormatSymbols(Locale.US));
+        if (useUTC) {
+            df.setTimeZone(TimeZone.getTimeZone("UTC"));
+        }
+        String parsedDateString = df.format(parsedDate);
+        assertEquals(expected, parsedDateString, "failed to match: " + dateString);
+    }
+
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 2a5e76d7d..1ea75cda4 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -16,15 +16,8 @@
  */
 package org.apache.tika.parser.mail;
 
-import static org.apache.tika.utils.DateUtils.MIDDAY;
-import static org.apache.tika.utils.DateUtils.UTC;
-
 import java.io.IOException;
 import java.io.InputStream;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
@@ -32,9 +25,6 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Stack;
-import java.util.TimeZone;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
@@ -47,7 +37,6 @@ import org.apache.james.mime4j.dom.address.AddressList;
 import org.apache.james.mime4j.dom.address.Mailbox;
 import org.apache.james.mime4j.dom.address.MailboxList;
 import org.apache.james.mime4j.dom.field.AddressListField;
-import org.apache.james.mime4j.dom.field.DateTimeField;
 import org.apache.james.mime4j.dom.field.MailboxListField;
 import org.apache.james.mime4j.dom.field.ParsedField;
 import org.apache.james.mime4j.dom.field.UnstructuredField;
@@ -71,6 +60,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.csv.TextAndCSVParser;
 import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.mailcommons.MailDateParser;
 import org.apache.tika.parser.mailcommons.MailUtil;
 import org.apache.tika.parser.txt.TXTParser;
 import org.apache.tika.sax.BodyContentHandler;
@@ -86,46 +76,6 @@ class MailContentHandler implements ContentHandler {
 
     private static final String MULTIPART_ALTERNATIVE = "multipart/alternative";
 
-    //TIKA-1970 Mac Mail's format
-    private static final Pattern GENERAL_TIME_ZONE_NO_MINUTES_PATTERN =
-            Pattern.compile("(?:UTC|GMT)([+-])(\\d?\\d)\\Z");
-
-    //find a time ending in am/pm without a space: 10:30am and
-    //use this pattern to insert space: 10:30 am
-    private static final Pattern AM_PM = Pattern.compile("(?i)(\\d)([ap]m)\\b");
-
-    private static final DateFormatInfo[] ALTERNATE_DATE_FORMATS = new DateFormatInfo[] {
-            //note that the string is "cleaned" before processing:
-            //1) condense multiple whitespace to single space
-            //2) trim()
-            //3) strip out commas
-            //4) insert space before am/pm
-            new DateFormatInfo("MMM dd yy hh:mm a"),
-
-            //this is a standard pattern handled by mime4j;
-            //but mime4j fails with leading whitespace
-            new DateFormatInfo("EEE d MMM yy HH:mm:ss Z", UTC),
-
-            new DateFormatInfo("EEE d MMM yy HH:mm:ss z", UTC),
-
-            new DateFormatInfo("EEE d MMM yy HH:mm:ss", null),// no timezone
-
-            new DateFormatInfo("EEEEE MMM d yy hh:mm a", null),// Sunday, May 15 2016 1:32 PM
-
-            //16 May 2016 at 09:30:32  GMT+1 (Mac Mail TIKA-1970)
-            new DateFormatInfo("d MMM yy 'at' HH:mm:ss z", UTC),   // UTC/Zulu
-
-            new DateFormatInfo("yy-MM-dd HH:mm:ss", null),
-
-            new DateFormatInfo("MM/dd/yy hh:mm a", null, false),
-
-            //now dates without times
-            new DateFormatInfo("MMM d yy", MIDDAY, false),
-            new DateFormatInfo("EEE d MMM yy", MIDDAY, false),
-            new DateFormatInfo("d MMM yy", MIDDAY, false),
-            new DateFormatInfo("yy/MM/dd", MIDDAY, false),
-            new DateFormatInfo("MM/dd/yy", MIDDAY, false)};
-
     private final XHTMLContentHandler handler;
     private final Metadata metadata;
     private final ParseContext parseContext;
@@ -154,45 +104,6 @@ class MailContentHandler implements ContentHandler {
         this.detector = detector;
     }
 
-    private static DateFormat createDateFormat(DateFormatInfo dateFormatInfo) {
-        SimpleDateFormat sdf = new SimpleDateFormat(dateFormatInfo.pattern,
-                new DateFormatSymbols(Locale.US));
-        if (dateFormatInfo.timeZone != null) {
-            sdf.setTimeZone(dateFormatInfo.timeZone);
-        }
-        sdf.setLenient(dateFormatInfo.lenient);
-        return sdf;
-    }
-
-    private static Date tryOtherDateFormats(String text) {
-        if (text == null) {
-            return null;
-        }
-        text = text.replaceAll("\\s+", " ").trim();
-        //strip out commas
-        text = text.replaceAll(",", "");
-
-        Matcher matcher = GENERAL_TIME_ZONE_NO_MINUTES_PATTERN.matcher(text);
-        if (matcher.find()) {
-            text = matcher.replaceFirst("GMT$1$2:00");
-        }
-
-        matcher = AM_PM.matcher(text);
-        if (matcher.find()) {
-            text = matcher.replaceFirst("$1 $2");
-        }
-
-        for (DateFormatInfo formatInfo : ALTERNATE_DATE_FORMATS) {
-            try {
-                DateFormat format = createDateFormat(formatInfo);
-                return format.parse(text);
-            } catch (ParseException e) {
-                //continue
-            }
-        }
-        return null;
-    }
-
     @Override
     public void body(BodyDescriptor body, InputStream is) throws MimeException, IOException {
         // use a different metadata object
@@ -431,12 +342,16 @@ class MailContentHandler implements ContentHandler {
                             field.getBody());
                 }
             } else if (fieldname.equalsIgnoreCase("Date")) {
-                DateTimeField dateField = (DateTimeField) parsedField;
-                Date date = dateField.getDate();
-                if (date == null) {
-                    date = tryOtherDateFormats(field.getBody());
+                String dateBody = parsedField.getBody();
+                Date date = null;
+                try {
+                    date = MailDateParser.parseDateLenient(dateBody);
+                    metadata.set(TikaCoreProperties.CREATED, date);
+                } catch (SecurityException e) {
+                    throw e;
+                } catch (Exception e) {
+                    //swallow
                 }
-                metadata.set(TikaCoreProperties.CREATED, date);
             } else {
                 metadata.add(Metadata.MESSAGE_RAW_HEADER_PREFIX + parsedField.getName(),
                         field.getBody());
@@ -649,24 +564,4 @@ class MailContentHandler implements ContentHandler {
             this.bytes = bytes;
         }
     }
-
-    private static class DateFormatInfo {
-        String pattern;
-        TimeZone timeZone;
-        boolean lenient;
-
-        public DateFormatInfo(String pattern) {
-            this(pattern, null, true);
-        }
-
-        public DateFormatInfo(String pattern, TimeZone timeZone) {
-            this(pattern, timeZone, true);
-        }
-
-        public DateFormatInfo(String pattern, TimeZone timeZone, boolean lenient) {
-            this.pattern = pattern;
-            this.timeZone = timeZone;
-            this.lenient = lenient;
-        }
-    }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
index a09a3fcfa..d01ae191a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
@@ -16,13 +16,12 @@
  */
 package org.apache.tika.parser.mbox;
 
-import static org.apache.tika.parser.mailcommons.MailDateParser.parseDate;
+import static org.apache.tika.parser.mailcommons.MailDateParser.parseDateLenient;
 
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.text.ParseException;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
@@ -194,9 +193,13 @@ public class MboxParser extends AbstractParser {
             metadata.add(TikaCoreProperties.SUBJECT, headerContent);
         } else if (headerTag.equalsIgnoreCase("Date")) {
             try {
-                Date date = parseDate(headerContent);
-                metadata.set(TikaCoreProperties.CREATED, date);
-            } catch (ParseException e) {
+                Date date = parseDateLenient(headerContent);
+                if (date != null) {
+                    metadata.set(TikaCoreProperties.CREATED, date);
+                }
+            } catch (SecurityException e) {
+                throw e;
+            } catch (Exception e) {
                 // ignoring date because format was not understood
             }
         } else if (headerTag.equalsIgnoreCase("Message-Id")) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index d87fd549b..04b9cd657 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-mail-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -32,13 +32,7 @@ import static org.mockito.Mockito.verify;
 import java.io.ByteArrayInputStream;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.SimpleDateFormat;
-import java.util.Date;
 import java.util.List;
-import java.util.Locale;
-import java.util.TimeZone;
 
 import org.apache.james.mime4j.stream.MimeConfig;
 import org.junit.jupiter.api.BeforeAll;
@@ -378,74 +372,8 @@ public class RFC822ParserTest extends TikaTest {
         r = getXML("testRFC822_eml");
         assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED));
 
-
-        String expected = "2016-05-15T01:32:00Z";
-
-        int dateNum = 0;
-        for (String dateString : new String[] {
-                // with timezone info:
-                "Sun, 15 May 2016 01:32:00 UTC", "      Sun, 15 May 2016 3:32:00 +0200",
-                // format correctly handled by mime4j if no leading whitespace
-                "      Sun, 14 May 2016 20:32:00 EST",
-                // no timezone info:
-                "Sun, 15 May 2016 01:32:00",
-                "Sunday, May 15 2016 1:32 AM", "May 15 2016 1:32am", "May 15 2016 1:32 am",
-                "2016-05-15 01:32:00", }) {
-            testDate(dateString, expected, dateNum++ < 3);
-        }
-
-        //now try days without times
-        expected = "2016-05-15T12:00:00Z";
-        for (String dateString : new String[]{"May 15, 2016", "Sun, 15 May 2016", "15 May 2016",}) {
-            testDate(dateString, expected, true);
-        }
     }
 
-    @Test
-    public void testTrickyDates() throws Exception {
-        DateFormat df = new SimpleDateFormat("yyyy-MM-dd", new DateFormatSymbols(Locale.US));
-        //make sure there are no mis-parses of e.g. 90 = year 90 A.D, not 1990
-        Date date1980 = df.parse("1980-01-01");
-        for (String dateString : new String[]{"Mon, 29 Jan 96 14:02 GMT", "7/20/95 1:12pm",
-                "08/14/2000  12:48 AM", "06/24/2008, Tuesday, 11 AM", "11/14/08", "12/02/1996",
-                "96/12/02",}) {
-            Date parsedDate = getDate(dateString);
-            if (parsedDate != null) {
-                assertTrue(parsedDate.getTime() > date1980.getTime(),
-                        "date must be after 1980:" + dateString);
-            }
-        }
-        //TODO: mime4j misparses these to pre 1980 dates
-        //"Wed, 27 Dec 95 11:20:40 EST",
-        //"26 Aug 00 11:14:52 EDT"
-        //
-        //We are still misparsing: 8/1/03 to a pre 1980 date
-
-    }
-
-    private void testDate(String dateString, String expected, boolean useUTC) throws Exception {
-        Date parsedDate = getDate(dateString);
-        assertNotNull(parsedDate, "couldn't parse " + dateString);
-        DateFormat df =
-                new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", new DateFormatSymbols(Locale.US));
-        if (useUTC) {
-            df.setTimeZone(TimeZone.getTimeZone("UTC"));
-        }
-        String parsedDateString = df.format(parsedDate);
-        assertEquals(expected, parsedDateString, "failed to match: " + dateString);
-    }
-
-    private Date getDate(String dateString) throws Exception {
-        String mail = "From: dev@tika.apache.org\n" + "Date: " + dateString + "\n";
-        Parser p = new RFC822Parser();
-        Metadata m = new Metadata();
-        try (InputStream is = TikaInputStream.get(mail.getBytes(StandardCharsets.UTF_8))) {
-            p.parse(is, new DefaultHandler(), m, new ParseContext());
-        }
-        return m.getDate(TikaCoreProperties.CREATED);
-    }
-
-
     @Test
     public void testMultipleSubjects() throws Exception {
         //adapted from govdocs1 303710.txt
@@ -615,6 +543,9 @@ public class RFC822ParserTest extends TikaTest {
         List<Metadata> metadataList = getRecursiveMetadata("testRFC822-ARC");
         assertEquals(1, metadataList.size());
         assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
+
+        //Also, test that this date has been parsed: Wed, 26 Jan 2022 09:14:37 +0100 (CET)
+        assertTrue(metadataList.get(0).get(TikaCoreProperties.CREATED).startsWith("2022-01-"));
     }
 
     @Test
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 4e9ce90e2..5e8b0e77a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -23,7 +23,6 @@ import java.io.UnsupportedEncodingException;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
-import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Date;
@@ -257,10 +256,12 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
 
                             // See if we can parse it as a normal mail date
                             try {
-                                Date d = MailDateParser.parseDate(date);
+                                Date d = MailDateParser.parseDateLenient(date);
                                 metadata.set(TikaCoreProperties.CREATED, d);
                                 metadata.set(TikaCoreProperties.MODIFIED, d);
-                            } catch (ParseException e) {
+                            } catch (SecurityException e ) {
+                                throw e;
+                            } catch (Exception e) {
                                 // Store it as-is, and hope for the best...
                                 metadata.set(TikaCoreProperties.CREATED, date);
                                 metadata.set(TikaCoreProperties.MODIFIED, date);