You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ti...@apache.org on 2022/05/18 19:04:18 UTC

[tika] branch branch_1x updated: TIKA-3770: update mockito

This is an automated email from the ASF dual-hosted git repository.

tilman pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_1x by this push:
     new af3bb5f83 TIKA-3770: update mockito
af3bb5f83 is described below

commit af3bb5f83975aa244e6443fe406b190af7ebcc67
Author: Tilman Hausherr <ti...@snafu.de>
AuthorDate: Wed May 18 21:03:58 2022 +0200

    TIKA-3770: update mockito
---
 .../apache/tika/parser/mail/RFC822ParserTest.java  | 1422 ++++++++++----------
 1 file changed, 711 insertions(+), 711 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 71db2459b..ae348eef3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -1,711 +1,711 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mail;
-
-import static java.nio.charset.StandardCharsets.US_ASCII;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.junit.Assume.assumeTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-
-import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.nio.charset.StandardCharsets;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.james.mime4j.stream.MimeConfig;
-import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.ContainerExtractor;
-import org.apache.tika.extractor.ParserContainerExtractor;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Message;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.PasswordProvider;
-import org.apache.tika.parser.ocr.TesseractOCRParserTest;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.RecursiveParserWrapperHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class RFC822ParserTest extends TikaTest {
-
-    private static InputStream getStream(String name) {
-        InputStream stream = Thread.currentThread().getContextClassLoader()
-                .getResourceAsStream(name);
-        assertNotNull("Test file not found " + name, stream);
-        return stream;
-    }
-
-    //legacy RFC822 behavior...extract every alternative part
-    private static Parser EXTRACT_ALL_ALTERNATIVES_PARSER;
-    private static TikaConfig TIKA_CONFIG;
-
-    @BeforeClass
-    public static void setUp() throws Exception {
-
-        try (InputStream is = getStream("org/apache/tika/parser/mail/tika-config-extract-all-alternatives.xml")) {
-            TIKA_CONFIG = new TikaConfig(is);
-        }
-        EXTRACT_ALL_ALTERNATIVES_PARSER = new AutoDetectParser(TIKA_CONFIG);
-    }
-
-    @Test
-    public void testSimple() throws Exception {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822");
-        ContentHandler handler = mock(DefaultHandler.class);
-
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
-            verify(handler).startDocument();
-            //just one body
-            verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
-            verify(handler).endElement(XHTMLContentHandler.XHTML, "p", "p");
-            //no multi-part body parts
-            verify(handler, never()).startElement(eq(XHTMLContentHandler.XHTML), eq("div"), eq("div"), any(Attributes.class));
-            verify(handler, never()).endElement(XHTMLContentHandler.XHTML, "div", "div");
-            verify(handler).endDocument();
-            //note no leading spaces, and no quotes
-            assertEquals("Julien Nioche (JIRA) <ji...@apache.org>", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
-                    metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
-                    metadata.get(Metadata.SUBJECT));
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-    }
-
-    @Test
-    public void testExtendedToFromMetadata() throws Exception {
-        Metadata m = getXML("testRFC822").metadata;
-        assertEquals("Julien Nioche (JIRA)", m.get(Message.MESSAGE_FROM_NAME));
-        assertEquals("jira@apache.org", m.get(Message.MESSAGE_FROM_EMAIL));
-
-        m = getXML("testRFC822-multipart").metadata;
-        assertEquals("DigitalPebble", m.get(Message.MESSAGE_FROM_NAME));
-        assertEquals("julien@digitalpebble.com", m.get(Message.MESSAGE_FROM_EMAIL));
-
-        m = getXML("testRFC822_quoted").metadata;
-        assertEquals("Another Person", m.get(Message.MESSAGE_FROM_NAME));
-        assertEquals("another.person@another-example.com", m.get(Message.MESSAGE_FROM_EMAIL));
-
-        m = getXML("testRFC822_i18nheaders").metadata;
-        assertEquals("Keld Jørn Simonsen", m.get(Message.MESSAGE_FROM_NAME));
-        assertEquals("keld@dkuug.dk", m.get(Message.MESSAGE_FROM_EMAIL));
-
-        //this is currently detected as mbox!!!
-        m = getXML("testEmailWithPNGAtt.eml", new RFC822Parser()).metadata;
-        assertEquals("Tika Test", m.get(Message.MESSAGE_FROM_NAME));
-        assertEquals("XXXX@apache.org", m.get(Message.MESSAGE_FROM_EMAIL));
-
-    }
-
-    @Test
-    public void testMultipart() {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822-multipart");
-        ContentHandler handler = mock(XHTMLContentHandler.class);
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-            verify(handler).startDocument();
-            int bodyExpectedTimes = 4, multipackExpectedTimes = 5;
-            // TIKA-1422. TesseractOCRParser interferes with the number of times the handler is invoked.
-            // But, different versions of Tesseract lead to a different number of invocations. So, we
-            // only verify the handler if Tesseract cannot run.
-            if (!TesseractOCRParserTest.canRun()) {
-                verify(handler, times(bodyExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML), eq("div"), eq("div"), any(Attributes.class));
-                verify(handler, times(bodyExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "div", "div");
-            }
-            verify(handler, times(multipackExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
-            verify(handler, times(multipackExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "p", "p");
-            verify(handler).endDocument();
-
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-
-        //repeat, this time looking at content
-        metadata = new Metadata();
-        stream = getStream("test-documents/testRFC822-multipart");
-        handler = new BodyContentHandler();
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-            //tests correct decoding of quoted printable text, including UTF-8 bytes into Unicode
-            String bodyText = handler.toString();
-            assertTrue(bodyText.contains("body 1"));
-            assertTrue(bodyText.contains("body 2"));
-            assertFalse(bodyText.contains("R0lGODlhNgE8AMQAA")); //part of encoded gif
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-    }
-
-    @Test
-    public void testQuotedPrintable() {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822_quoted");
-        ContentHandler handler = new BodyContentHandler();
-        ParseContext context = new ParseContext();
-
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-            //tests correct decoding of quoted printable text, including UTF-8 bytes into Unicode
-            String bodyText = handler.toString();
-            assertTrue(bodyText.contains("D\u00FCsseldorf has non-ascii."));
-            assertTrue(bodyText.contains("Lines can be split like this."));
-            assertTrue(bodyText.contains("Spaces at the end of a line \r\nmust be encoded.\r\n"));
-            assertFalse(bodyText.contains("=")); //there should be no escape sequences
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-    }
-
-    @Test
-    public void testBase64() throws Exception  {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822_base64");
-        ContentHandler handler = new BodyContentHandler();
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
-        //need to pass in hint.  Autodetects text/plain
-        metadata.set(Metadata.CONTENT_TYPE, "message/rfc822");
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-            //tests correct decoding of base64 text, including ISO-8859-1 bytes into Unicode
-            assertContains("Here is some text, with international characters, voil\u00E0!", handler.toString());
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-    }
-
-    @Test
-    public void testI18NHeaders() {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822_i18nheaders");
-        ContentHandler handler = mock(DefaultHandler.class);
-
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
-            //tests correct decoding of internationalized headers, both
-            //quoted-printable (Q) and Base64 (B).
-            assertEquals("Keld J\u00F8rn Simonsen <ke...@dkuug.dk>",
-                    metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("If you can read this you understand the example.",
-                    metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("If you can read this you understand the example.",
-                    metadata.get(Metadata.SUBJECT));
-        } catch (Exception e) {
-            fail("Exception thrown: " + e.getMessage());
-        }
-    }
-
-    /**
-     * The from isn't in the usual form.
-     * See TIKA-618
-     */
-    @Test
-    public void testUnusualFromAddress() throws Exception {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822_oddfrom");
-        ContentHandler handler = mock(DefaultHandler.class);
-
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
-        assertEquals("Saved by Windows Internet Explorer 7",
-                metadata.get(TikaCoreProperties.CREATOR));
-        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
-                metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
-                metadata.get(Metadata.SUBJECT));
-    }
-
-    @Test
-    public void testMainBody() throws Exception {
-        //test that the first text or html chunk is processed in the main body
-        //not treated as an attachment. TIKA-2547
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822_oddfrom");
-        assertEquals(7, metadataList.size());
-        assertContains("Air Quality Planning", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-
-        //Make sure text alternative doesn't get treated as an attachment
-        metadataList = getRecursiveMetadata("testRFC822_normal_zip");
-        assertEquals(3, metadataList.size());
-        assertContains("This is the HTML part", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertEquals("application/zip", metadataList.get(2).get(Metadata.CONTENT_TYPE));
-
-        metadataList = getRecursiveMetadata("testRFC822-txt-body");
-        assertEquals(2, metadataList.size());
-        assertContains("body 1", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-    }
-
-    /**
-     * Test for TIKA-640, increase header max beyond 10k bytes
-     */
-    @Test
-    public void testLongHeader() throws Exception {
-        StringBuilder inputBuilder = new StringBuilder();
-        for (int i = 0; i < 2000; ++i) {
-            inputBuilder.append( //len > 50
-                    "really really really really really really long name ");
-        }
-        String name = inputBuilder.toString();
-        byte[] data = ("Status: 520\r\nFrom: " + name + "\r\n\r\n").getBytes(US_ASCII);
-
-        ContentHandler handler = new DefaultHandler();
-        Metadata metadata = new Metadata();
-        ParseContext context = new ParseContext();
-
-        try {
-            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(
-                    new ByteArrayInputStream(data), handler, metadata, context);
-            fail();
-        } catch (TikaException expected) {
-        }
-
-        MimeConfig config = new MimeConfig.Builder().setMaxHeaderLen(-1).setMaxLineLen(-1).build();
-        context.set(MimeConfig.class, config);
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(
-                new ByteArrayInputStream(data), handler, metadata, context);
-        assertEquals(name.trim(), metadata.get(TikaCoreProperties.CREATOR));
-    }
-
-    /**
-     * Test for TIKA-678 - not all headers may be present
-     */
-    @Test
-    public void testSomeMissingHeaders() throws Exception {
-        Metadata metadata = new Metadata();
-        InputStream stream = getStream("test-documents/testRFC822-limitedheaders");
-        ContentHandler handler = new BodyContentHandler();
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
-
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-        assertEquals(true, metadata.isMultiValued(TikaCoreProperties.CREATOR));
-        assertEquals("xyz", metadata.getValues(TikaCoreProperties.CREATOR)[0]);
-        assertEquals("abc", metadata.getValues(TikaCoreProperties.CREATOR)[1]);
-        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_FROM));
-        assertEquals("xyz", metadata.getValues(Metadata.MESSAGE_FROM)[0]);
-        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_FROM)[1]);
-        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_TO));
-        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_TO)[0]);
-        assertEquals("def", metadata.getValues(Metadata.MESSAGE_TO)[1]);
-        assertEquals("abcd", metadata.get(TikaCoreProperties.TITLE));
-        assertEquals("abcd", metadata.get(Metadata.SUBJECT));
-        assertContains("bar biz bat", handler.toString());
-    }
-
-    /**
-     * Test TIKA-1028 - If the mail contains an encrypted attachment (or
-     * an attachment that others triggers an error), parsing should carry
-     * on for the remainder regardless
-     */
-    @Test
-    public void testEncryptedZipAttachment() throws Exception {
-        Metadata metadata = new Metadata();
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
-        InputStream stream = getStream("test-documents/testRFC822_encrypted_zip");
-        ContentHandler handler = new BodyContentHandler();
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-
-        // Check we go the metadata
-        assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
-        assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
-
-        // Check we got the message text, for both Plain Text and HTML
-        assertContains("Includes encrypted zip file", handler.toString());
-        assertContains("password is \"test\".", handler.toString());
-        assertContains("This is the Plain Text part", handler.toString());
-        assertContains("This is the HTML part", handler.toString());
-
-        // We won't get the contents of the zip file, but we will get the name
-        assertContains("text.txt", handler.toString());
-        assertNotContained("ENCRYPTED ZIP FILES", handler.toString());
-
-        // Try again, this time with the password supplied
-        // Check that we also get the zip's contents as well
-        context.set(PasswordProvider.class, new PasswordProvider() {
-            public String getPassword(Metadata metadata) {
-                return "test";
-            }
-        });
-        stream = getStream("test-documents/testRFC822_encrypted_zip");
-        handler = new BodyContentHandler();
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-
-        assertContains("Includes encrypted zip file", handler.toString());
-        assertContains("password is \"test\".", handler.toString());
-        assertContains("This is the Plain Text part", handler.toString());
-        assertContains("This is the HTML part", handler.toString());
-
-        // We do get the name of the file in the encrypted zip file
-        assertContains("text.txt", handler.toString());
-
-        // TODO Upgrade to a version of Commons Compress with Encryption
-        //  support, then verify we get the contents of the text file
-        //  held within the encrypted zip
-        assumeTrue(false); // No Zip Encryption support yet
-        assertContains("TEST DATA FOR TIKA.", handler.toString());
-        assertContains("ENCRYPTED ZIP FILES", handler.toString());
-        assertContains("TIKA-1028", handler.toString());
-    }
-
-    /**
-     * Test TIKA-1028 - Ensure we can get the contents of an
-     * un-encrypted zip file
-     */
-    @Test
-    public void testNormalZipAttachment() throws Exception {
-        Metadata metadata = new Metadata();
-        ParseContext context = new ParseContext();
-        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
-        InputStream stream = getStream("test-documents/testRFC822_normal_zip");
-        ContentHandler handler = new BodyContentHandler();
-        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
-
-        // Check we go the metadata
-        assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
-        assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
-
-        // Check we got the message text, for both Plain Text and HTML
-        assertContains("Includes a normal, unencrypted zip file", handler.toString());
-        assertContains("This is the Plain Text part", handler.toString());
-        assertContains("This is the HTML part", handler.toString());
-
-        // We get both name and contents of the zip file's contents
-        assertContains("text.txt", handler.toString());
-        assertContains("TEST DATA FOR TIKA.", handler.toString());
-        assertContains("This is text inside an unencrypted zip file", handler.toString());
-        assertContains("TIKA-1028", handler.toString());
-        assertEquals("<ju...@gmail.com>", metadata.get("Message:Raw-Header:Return-Path"));
-    }
-
-    /**
-     * TIKA-1222 When requested, ensure that the various attachments of
-     * the mail come through properly as embedded resources
-     */
-    @Test
-    public void testGetAttachmentsAsEmbeddedResources() throws Exception {
-        TrackingHandler tracker = new TrackingHandler();
-        ContainerExtractor ex = new ParserContainerExtractor(TIKA_CONFIG);
-        try (TikaInputStream tis = TikaInputStream.get(getStream("test-documents/testRFC822-multipart"))) {
-            assertEquals(true, ex.isSupported(tis));
-            ex.extract(tis, ex, tracker);
-        }
-
-        // Check we found all 3 parts
-        assertEquals(3, tracker.filenames.size());
-        assertEquals(3, tracker.mediaTypes.size());
-
-        // No filenames available
-        assertEquals(null, tracker.filenames.get(0));
-        assertEquals(null, tracker.filenames.get(1));
-        // Except for this using Content-Disposition filename field
-        assertEquals("logo.gif", tracker.filenames.get(2));
-        // Types are available
-        assertEquals(MediaType.TEXT_PLAIN, tracker.mediaTypes.get(0));
-        assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
-        assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
-    }
-
-    @Test
-    public void testDetection() throws Exception {
-        //test simple text file
-        XMLResult r = getXML("testRFC822_date_utf8");
-        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
-
-        //test without extension
-        r = getXML("testRFC822_eml");
-        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
-    }
-
-    @Test
-    public void testDates() throws Exception {
-        //tests non-standard dates that mime4j can't parse
-        XMLResult r = getXML("testRFC822_date_utf8");
-        assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED));
-
-        r = getXML("testRFC822_eml");
-        assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED));
-
-
-        String expected = "2016-05-15T01:32:00Z";
-
-        for (String dateString : new String[]{
-                "Sun, 15 May 2016 01:32:00 UTC", //make sure this test basically works
-                "Sun, 15 May 2016 01:32:00", //no timezone
-                "Sunday, May 15 2016 1:32 AM",
-                "May 15 2016 1:32am",
-                "May 15 2016 1:32 am",
-                "2016-05-15 01:32:00",
-                "      Sun, 15 May 2016 3:32:00 +0200",//format correctly handled by mime4j if no leading whitespace
-                "      Sun, 14 May 2016 20:32:00 EST",
-        }) {
-            testDate(dateString, expected);
-        }
-
-        //now try days without times
-        expected = "2016-05-15T12:00:00Z";
-        for (String dateString : new String[]{
-                "May 15, 2016",
-                "Sun, 15 May 2016",
-                "15 May 2016",
-        }) {
-            testDate(dateString, expected);
-        }
-    }
-
-    @Test
-    public void testTrickyDates() throws Exception {
-        DateFormat df = new SimpleDateFormat("yyyy-MM-dd", new DateFormatSymbols(Locale.US));
-        //make sure there are no mis-parses of e.g. 90 = year 90 A.D, not 1990
-        Date date1980 = df.parse("1980-01-01");
-        for (String dateString : new String[] {
-                "Mon, 29 Jan 96 14:02 GMT",
-                "7/20/95 1:12pm",
-                "08/14/2000  12:48 AM",
-                "06/24/2008, Tuesday, 11 AM",
-                "11/14/08",
-                "12/02/1996",
-                "96/12/02",
-        }) {
-            Date parsedDate = getDate(dateString);
-            if (parsedDate != null) {
-                assertTrue("date must be after 1980:"+dateString, parsedDate.getTime() > date1980.getTime());
-            }
-        }
-        //TODO: mime4j misparses these to pre 1980 dates
-        //"Wed, 27 Dec 95 11:20:40 EST",
-        //"26 Aug 00 11:14:52 EDT"
-        //
-        //We are still misparsing: 8/1/03 to a pre 1980 date
-
-    }
-
-    private void testDate(String dateString, String expected) throws Exception {
-        Date parsedDate = getDate(dateString);
-        assertNotNull("couldn't parse " + dateString, parsedDate);
-        DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
-                new DateFormatSymbols(Locale.US));
-        String parsedDateString = df.format(parsedDate);
-        assertEquals("failed to match: "+dateString, expected, parsedDateString);
-    }
-
-    private Date getDate(String dateString) throws Exception {
-        String mail = "From: dev@tika.apache.org\n"+
-                "Date: "+dateString+"\n";
-        Parser p = new RFC822Parser();
-        Metadata m = new Metadata();
-        try (InputStream is = TikaInputStream.get(mail.getBytes(StandardCharsets.UTF_8))) {
-            p.parse(is, new DefaultHandler(), m, new ParseContext());
-        }
-        return m.getDate(TikaCoreProperties.CREATED);
-    }
-
-
-    @Test
-    public void testMultipleSubjects() throws Exception {
-        //adapted from govdocs1 303710.txt
-        String s = "From: Shawn Jones [chiroshawn@yahoo.com]\n" +
-                "Subject: 2006N-3502\n" +
-                "Subject: I Urge You to Require Notice of Mercury";
-        Parser p = new RFC822Parser();
-        Metadata m = new Metadata();
-        p.parse(TikaInputStream.get(s.getBytes(StandardCharsets.UTF_8)), new DefaultHandler(), m, new ParseContext());
-        assertEquals("I Urge You to Require Notice of Mercury", m.get(TikaCoreProperties.TITLE));
-    }
-
-
-    @Test
-    public void testExtractAttachments() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testEmailWithPNGAtt.eml",
-                EXTRACT_ALL_ALTERNATIVES_PARSER);
-        // Check we get the metadata
-        assertEquals("Tika Test <XX...@apache.org>", metadataList.get(3).get(Metadata.MESSAGE_FROM));
-        assertEquals("Test Attachment Email", metadataList.get(3).get(TikaCoreProperties.TITLE));
-
-        // Check attachments
-        assertEquals(4, metadataList.size());
-        assertEquals("text/plain; charset=UTF-8", metadataList.get(1).get(Metadata.CONTENT_TYPE));
-        assertEquals("image/png", metadataList.get(2).get(Metadata.CONTENT_TYPE));
-        assertEquals("testPNG.png", metadataList.get(2).get(Metadata.RESOURCE_NAME_KEY));
-        assertContains("This email has a PNG attachment included in it",
-                metadataList.get(1).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertEquals(null, metadataList.get(1).get(Metadata.CONTENT_DISPOSITION));
-        assertEquals("attachment; filename=\"testPNG.png\"", metadataList.get(2).get(Metadata.CONTENT_DISPOSITION));
-    }
-
-    @Test
-    public void testEmbeddedMetadata() throws Exception {
-        List<Metadata> seenMetadata = getRecursiveMetadata("testRFC822-multipart",
-                EXTRACT_ALL_ALTERNATIVES_PARSER);
-
-        assertEquals(4, seenMetadata.size());
-        assertEquals(null, seenMetadata.get(1).get(Metadata.CONTENT_DISPOSITION));
-        assertEquals("text/plain; charset=UTF-8", seenMetadata.get(1).get(Metadata.CONTENT_TYPE));
-        assertEquals("UTF-8", seenMetadata.get(1).get(Metadata.CONTENT_ENCODING));
-        assertEquals(null, seenMetadata.get(2).get(Metadata.CONTENT_DISPOSITION));
-        assertEquals("text/html; charset=UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_TYPE));
-        assertEquals("UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_ENCODING));
-        assertEquals("attachment; filename=\"logo.gif\"", seenMetadata.get(3).get(Metadata.CONTENT_DISPOSITION));
-        assertEquals("logo.gif", seenMetadata.get(3).get(Metadata.RESOURCE_NAME_KEY));
-        assertEquals("image/gif", seenMetadata.get(3).get(Metadata.CONTENT_TYPE));
-    }
-
-    @Test
-    public void testMultipartFlags() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-multipart", EXTRACT_ALL_ALTERNATIVES_PARSER);
-        // Check the root metadata.
-        assertEquals("mixed", metadataList.get(0).get(Message.MULTIPART_SUBTYPE));
-        assertEquals("0016e64606800312ee04913db790", metadataList.get(0).get(Message.MULTIPART_BOUNDARY));
-
-        // Check the metadata of the first alternative.
-        assertTrue(metadataList.get(1).get(Metadata.CONTENT_TYPE).equals("text/plain; charset=UTF-8"));
-        assertTrue(metadataList.get(1).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
-        assertTrue(metadataList.get(1).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
-
-        // Check the metadata of the second alternative.
-        assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).equals("text/html; charset=UTF-8"));
-        assertTrue(metadataList.get(2).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
-        assertTrue(metadataList.get(2).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
-
-        // Check the metadata of the attached GIF.
-        assertTrue(metadataList.get(3).get(Metadata.CONTENT_TYPE).equals("image/gif"));
-        assertEquals("mixed", metadataList.get(3).get(Message.MULTIPART_SUBTYPE));
-        assertEquals("0016e64606800312ee04913db790", metadataList.get(3).get(Message.MULTIPART_BOUNDARY));
-    }
-
-    @Test
-    public void testBasicAlternativeBodyHandling() throws Exception {
-        /*
-            multi-part/mixed
-                multi-part/alternative
-                    text
-                    html
-                gif
-         */
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-multipart");
-        assertEquals(2, metadataList.size());
-        String body = metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT);
-        assertContains("body 2", body);
-        assertNotContained("body 1", body);
-        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
-        assertEquals("image/gif", metadataList.get(1).get(Metadata.CONTENT_TYPE));
-        assertEquals("/logo.gif", metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
-    }
-
-    @Test
-    public void testMixedRelatedMultipart() throws Exception {
-        /*
-            multipart/mixed (..6)
-                multipart/related (..5)
-                    multipart/alternative  (..4)
-                        text/plain
-                        text/html
-                image/jpeg (inline) Mary with cooler.jpeg  (..5)
-            image/jpeg (attachment) mary-coffee.jpg (..6)
-
-         */
-
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-mixed-simple");
-        assertEquals(3, metadataList.size());
-
-        assertContains("body 2", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertNotContained("body 1", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
-
-        assertEquals("image/jpeg", metadataList.get(1).get(Metadata.CONTENT_TYPE));
-        assertEquals("/Mary with cooler.jpeg",
-                metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
-        assertEquals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString(),
-                metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
-
-        assertEquals("image/jpeg", metadataList.get(2).get(Metadata.CONTENT_TYPE));
-        assertEquals("/mary-coffee.jpg",
-                metadataList.get(2).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
-        assertEquals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString(),
-                metadataList.get(2).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
-    }
-
-    @Test
-    public void testAlternativeWithComplexMixedChild() throws Exception {
-        /*
-        This tests that both html body chunks are stitched back
-        together in the body text for the main email.
-
-            multi-part/alternative
-                    text
-                    multipart/mixed
-                        html body chunk 1
-                        pdf
-                        html body chunk 2
-
-         */
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-mixed-with-pdf-inline");
-        assertEquals(2, metadataList.size());
-        String body = metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT);
-        assertContains("body 2", body);
-        assertContains("body 3", body);
-        assertNotContained("body 1", body);
-        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
-        assertEquals("application/pdf", metadataList.get(1).get(Metadata.CONTENT_TYPE));
-        assertEquals("/tzora-titan-4-hummer-xl-manual.pdf",
-                metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
-    }
-
-    @Test
-    public void testSimpleBodyInlined() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testRFC822_simple_inline_body.txt");
-        assertEquals(1, metadataList.size());
-        assertContains("asked", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-    }
-
-    @Test
-    public void testGroupwise() throws Exception {
-        //TODO -- this should treat attachments as attachments, no?
-        List<Metadata>  metadataList = getRecursiveMetadata("testGroupWiseEml.eml");
-        assertEquals(1, metadataList.size());
-        assertContains("ssssss", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mail;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.junit.Assume.assumeTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.james.mime4j.stream.MimeConfig;
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.ContainerExtractor;
+import org.apache.tika.extractor.ParserContainerExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Message;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.ocr.TesseractOCRParserTest;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class RFC822ParserTest extends TikaTest {
+
+    private static InputStream getStream(String name) {
+        InputStream stream = Thread.currentThread().getContextClassLoader()
+                .getResourceAsStream(name);
+        assertNotNull("Test file not found " + name, stream);
+        return stream;
+    }
+
+    //legacy RFC822 behavior...extract every alternative part
+    private static Parser EXTRACT_ALL_ALTERNATIVES_PARSER;
+    private static TikaConfig TIKA_CONFIG;
+
+    @BeforeClass
+    public static void setUp() throws Exception {
+
+        try (InputStream is = getStream("org/apache/tika/parser/mail/tika-config-extract-all-alternatives.xml")) {
+            TIKA_CONFIG = new TikaConfig(is);
+        }
+        EXTRACT_ALL_ALTERNATIVES_PARSER = new AutoDetectParser(TIKA_CONFIG);
+    }
+
+    @Test
+    public void testSimple() throws Exception {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822");
+        ContentHandler handler = mock(DefaultHandler.class);
+
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
+            verify(handler).startDocument();
+            //just one body
+            verify(handler).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
+            verify(handler).endElement(XHTMLContentHandler.XHTML, "p", "p");
+            //no multi-part body parts
+            verify(handler, never()).startElement(eq(XHTMLContentHandler.XHTML), eq("div"), eq("div"), any(Attributes.class));
+            verify(handler, never()).endElement(XHTMLContentHandler.XHTML, "div", "div");
+            verify(handler).endDocument();
+            //note no leading spaces, and no quotes
+            assertEquals("Julien Nioche (JIRA) <ji...@apache.org>", metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
+                    metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
+                    metadata.get(Metadata.SUBJECT));
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testExtendedToFromMetadata() throws Exception {
+        Metadata m = getXML("testRFC822").metadata;
+        assertEquals("Julien Nioche (JIRA)", m.get(Message.MESSAGE_FROM_NAME));
+        assertEquals("jira@apache.org", m.get(Message.MESSAGE_FROM_EMAIL));
+
+        m = getXML("testRFC822-multipart").metadata;
+        assertEquals("DigitalPebble", m.get(Message.MESSAGE_FROM_NAME));
+        assertEquals("julien@digitalpebble.com", m.get(Message.MESSAGE_FROM_EMAIL));
+
+        m = getXML("testRFC822_quoted").metadata;
+        assertEquals("Another Person", m.get(Message.MESSAGE_FROM_NAME));
+        assertEquals("another.person@another-example.com", m.get(Message.MESSAGE_FROM_EMAIL));
+
+        m = getXML("testRFC822_i18nheaders").metadata;
+        assertEquals("Keld Jørn Simonsen", m.get(Message.MESSAGE_FROM_NAME));
+        assertEquals("keld@dkuug.dk", m.get(Message.MESSAGE_FROM_EMAIL));
+
+        //this is currently detected as mbox!!!
+        m = getXML("testEmailWithPNGAtt.eml", new RFC822Parser()).metadata;
+        assertEquals("Tika Test", m.get(Message.MESSAGE_FROM_NAME));
+        assertEquals("XXXX@apache.org", m.get(Message.MESSAGE_FROM_EMAIL));
+
+    }
+
+    @Test
+    public void testMultipart() {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822-multipart");
+        ContentHandler handler = mock(XHTMLContentHandler.class);
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+            verify(handler).startDocument();
+            int bodyExpectedTimes = 4, multipackExpectedTimes = 5;
+            // TIKA-1422. TesseractOCRParser interferes with the number of times the handler is invoked.
+            // But, different versions of Tesseract lead to a different number of invocations. So, we
+            // only verify the handler if Tesseract cannot run.
+            if (!TesseractOCRParserTest.canRun()) {
+                verify(handler, times(bodyExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML), eq("div"), eq("div"), any(Attributes.class));
+                verify(handler, times(bodyExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "div", "div");
+            }
+            verify(handler, times(multipackExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
+            verify(handler, times(multipackExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "p", "p");
+            verify(handler).endDocument();
+
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+
+        //repeat, this time looking at content
+        metadata = new Metadata();
+        stream = getStream("test-documents/testRFC822-multipart");
+        handler = new BodyContentHandler();
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+            //tests correct decoding of quoted printable text, including UTF-8 bytes into Unicode
+            String bodyText = handler.toString();
+            assertTrue(bodyText.contains("body 1"));
+            assertTrue(bodyText.contains("body 2"));
+            assertFalse(bodyText.contains("R0lGODlhNgE8AMQAA")); //part of encoded gif
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testQuotedPrintable() {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822_quoted");
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+            //tests correct decoding of quoted printable text, including UTF-8 bytes into Unicode
+            String bodyText = handler.toString();
+            assertTrue(bodyText.contains("D\u00FCsseldorf has non-ascii."));
+            assertTrue(bodyText.contains("Lines can be split like this."));
+            assertTrue(bodyText.contains("Spaces at the end of a line \r\nmust be encoded.\r\n"));
+            assertFalse(bodyText.contains("=")); //there should be no escape sequences
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testBase64() throws Exception  {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822_base64");
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
+        //need to pass in hint.  Autodetects text/plain
+        metadata.set(Metadata.CONTENT_TYPE, "message/rfc822");
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+            //tests correct decoding of base64 text, including ISO-8859-1 bytes into Unicode
+            assertContains("Here is some text, with international characters, voil\u00E0!", handler.toString());
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testI18NHeaders() {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822_i18nheaders");
+        ContentHandler handler = mock(DefaultHandler.class);
+
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
+            //tests correct decoding of internationalized headers, both
+            //quoted-printable (Q) and Base64 (B).
+            assertEquals("Keld J\u00F8rn Simonsen <ke...@dkuug.dk>",
+                    metadata.get(TikaCoreProperties.CREATOR));
+            assertEquals("If you can read this you understand the example.",
+                    metadata.get(TikaCoreProperties.TITLE));
+            assertEquals("If you can read this you understand the example.",
+                    metadata.get(Metadata.SUBJECT));
+        } catch (Exception e) {
+            fail("Exception thrown: " + e.getMessage());
+        }
+    }
+
+    /**
+     * The from isn't in the usual form.
+     * See TIKA-618
+     */
+    @Test
+    public void testUnusualFromAddress() throws Exception {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822_oddfrom");
+        ContentHandler handler = mock(DefaultHandler.class);
+
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, new ParseContext());
+        assertEquals("Saved by Windows Internet Explorer 7",
+                metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
+                metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
+                metadata.get(Metadata.SUBJECT));
+    }
+
+    @Test
+    public void testMainBody() throws Exception {
+        //test that the first text or html chunk is processed in the main body
+        //not treated as an attachment. TIKA-2547
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822_oddfrom");
+        assertEquals(7, metadataList.size());
+        assertContains("Air Quality Planning", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+
+        //Make sure text alternative doesn't get treated as an attachment
+        metadataList = getRecursiveMetadata("testRFC822_normal_zip");
+        assertEquals(3, metadataList.size());
+        assertContains("This is the HTML part", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals("application/zip", metadataList.get(2).get(Metadata.CONTENT_TYPE));
+
+        metadataList = getRecursiveMetadata("testRFC822-txt-body");
+        assertEquals(2, metadataList.size());
+        assertContains("body 1", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+
+    /**
+     * Test for TIKA-640, increase header max beyond 10k bytes
+     */
+    @Test
+    public void testLongHeader() throws Exception {
+        StringBuilder inputBuilder = new StringBuilder();
+        for (int i = 0; i < 2000; ++i) {
+            inputBuilder.append( //len > 50
+                    "really really really really really really long name ");
+        }
+        String name = inputBuilder.toString();
+        byte[] data = ("Status: 520\r\nFrom: " + name + "\r\n\r\n").getBytes(US_ASCII);
+
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+
+        try {
+            EXTRACT_ALL_ALTERNATIVES_PARSER.parse(
+                    new ByteArrayInputStream(data), handler, metadata, context);
+            fail();
+        } catch (TikaException expected) {
+        }
+
+        MimeConfig config = new MimeConfig.Builder().setMaxHeaderLen(-1).setMaxLineLen(-1).build();
+        context.set(MimeConfig.class, config);
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(
+                new ByteArrayInputStream(data), handler, metadata, context);
+        assertEquals(name.trim(), metadata.get(TikaCoreProperties.CREATOR));
+    }
+
+    /**
+     * Test for TIKA-678 - not all headers may be present
+     */
+    @Test
+    public void testSomeMissingHeaders() throws Exception {
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822-limitedheaders");
+        ContentHandler handler = new BodyContentHandler();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
+
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+        assertEquals(true, metadata.isMultiValued(TikaCoreProperties.CREATOR));
+        assertEquals("xyz", metadata.getValues(TikaCoreProperties.CREATOR)[0]);
+        assertEquals("abc", metadata.getValues(TikaCoreProperties.CREATOR)[1]);
+        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_FROM));
+        assertEquals("xyz", metadata.getValues(Metadata.MESSAGE_FROM)[0]);
+        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_FROM)[1]);
+        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_TO));
+        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_TO)[0]);
+        assertEquals("def", metadata.getValues(Metadata.MESSAGE_TO)[1]);
+        assertEquals("abcd", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("abcd", metadata.get(Metadata.SUBJECT));
+        assertContains("bar biz bat", handler.toString());
+    }
+
+    /**
+     * Test TIKA-1028 - If the mail contains an encrypted attachment (or
+     * an attachment that others triggers an error), parsing should carry
+     * on for the remainder regardless
+     */
+    @Test
+    public void testEncryptedZipAttachment() throws Exception {
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
+        InputStream stream = getStream("test-documents/testRFC822_encrypted_zip");
+        ContentHandler handler = new BodyContentHandler();
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+
+        // Check we go the metadata
+        assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
+        assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
+
+        // Check we got the message text, for both Plain Text and HTML
+        assertContains("Includes encrypted zip file", handler.toString());
+        assertContains("password is \"test\".", handler.toString());
+        assertContains("This is the Plain Text part", handler.toString());
+        assertContains("This is the HTML part", handler.toString());
+
+        // We won't get the contents of the zip file, but we will get the name
+        assertContains("text.txt", handler.toString());
+        assertNotContained("ENCRYPTED ZIP FILES", handler.toString());
+
+        // Try again, this time with the password supplied
+        // Check that we also get the zip's contents as well
+        context.set(PasswordProvider.class, new PasswordProvider() {
+            public String getPassword(Metadata metadata) {
+                return "test";
+            }
+        });
+        stream = getStream("test-documents/testRFC822_encrypted_zip");
+        handler = new BodyContentHandler();
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+
+        assertContains("Includes encrypted zip file", handler.toString());
+        assertContains("password is \"test\".", handler.toString());
+        assertContains("This is the Plain Text part", handler.toString());
+        assertContains("This is the HTML part", handler.toString());
+
+        // We do get the name of the file in the encrypted zip file
+        assertContains("text.txt", handler.toString());
+
+        // TODO Upgrade to a version of Commons Compress with Encryption
+        //  support, then verify we get the contents of the text file
+        //  held within the encrypted zip
+        assumeTrue(false); // No Zip Encryption support yet
+        assertContains("TEST DATA FOR TIKA.", handler.toString());
+        assertContains("ENCRYPTED ZIP FILES", handler.toString());
+        assertContains("TIKA-1028", handler.toString());
+    }
+
+    /**
+     * Test TIKA-1028 - Ensure we can get the contents of an
+     * un-encrypted zip file
+     */
+    @Test
+    public void testNormalZipAttachment() throws Exception {
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, EXTRACT_ALL_ALTERNATIVES_PARSER);
+        InputStream stream = getStream("test-documents/testRFC822_normal_zip");
+        ContentHandler handler = new BodyContentHandler();
+        EXTRACT_ALL_ALTERNATIVES_PARSER.parse(stream, handler, metadata, context);
+
+        // Check we go the metadata
+        assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
+        assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
+
+        // Check we got the message text, for both Plain Text and HTML
+        assertContains("Includes a normal, unencrypted zip file", handler.toString());
+        assertContains("This is the Plain Text part", handler.toString());
+        assertContains("This is the HTML part", handler.toString());
+
+        // We get both name and contents of the zip file's contents
+        assertContains("text.txt", handler.toString());
+        assertContains("TEST DATA FOR TIKA.", handler.toString());
+        assertContains("This is text inside an unencrypted zip file", handler.toString());
+        assertContains("TIKA-1028", handler.toString());
+        assertEquals("<ju...@gmail.com>", metadata.get("Message:Raw-Header:Return-Path"));
+    }
+
+    /**
+     * TIKA-1222 When requested, ensure that the various attachments of
+     * the mail come through properly as embedded resources
+     */
+    @Test
+    public void testGetAttachmentsAsEmbeddedResources() throws Exception {
+        TrackingHandler tracker = new TrackingHandler();
+        ContainerExtractor ex = new ParserContainerExtractor(TIKA_CONFIG);
+        try (TikaInputStream tis = TikaInputStream.get(getStream("test-documents/testRFC822-multipart"))) {
+            assertEquals(true, ex.isSupported(tis));
+            ex.extract(tis, ex, tracker);
+        }
+
+        // Check we found all 3 parts
+        assertEquals(3, tracker.filenames.size());
+        assertEquals(3, tracker.mediaTypes.size());
+
+        // No filenames available
+        assertEquals(null, tracker.filenames.get(0));
+        assertEquals(null, tracker.filenames.get(1));
+        // Except for this using Content-Disposition filename field
+        assertEquals("logo.gif", tracker.filenames.get(2));
+        // Types are available
+        assertEquals(MediaType.TEXT_PLAIN, tracker.mediaTypes.get(0));
+        assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
+        assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
+    }
+
+    @Test
+    public void testDetection() throws Exception {
+        //test simple text file
+        XMLResult r = getXML("testRFC822_date_utf8");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+
+        //test without extension
+        r = getXML("testRFC822_eml");
+        assertEquals("message/rfc822", r.metadata.get(Metadata.CONTENT_TYPE));
+    }
+
+    @Test
+    public void testDates() throws Exception {
+        //tests non-standard dates that mime4j can't parse
+        XMLResult r = getXML("testRFC822_date_utf8");
+        assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED));
+
+        r = getXML("testRFC822_eml");
+        assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED));
+
+
+        String expected = "2016-05-15T01:32:00Z";
+
+        for (String dateString : new String[]{
+                "Sun, 15 May 2016 01:32:00 UTC", //make sure this test basically works
+                "Sun, 15 May 2016 01:32:00", //no timezone
+                "Sunday, May 15 2016 1:32 AM",
+                "May 15 2016 1:32am",
+                "May 15 2016 1:32 am",
+                "2016-05-15 01:32:00",
+                "      Sun, 15 May 2016 3:32:00 +0200",//format correctly handled by mime4j if no leading whitespace
+                "      Sun, 14 May 2016 20:32:00 EST",
+        }) {
+            testDate(dateString, expected);
+        }
+
+        //now try days without times
+        expected = "2016-05-15T12:00:00Z";
+        for (String dateString : new String[]{
+                "May 15, 2016",
+                "Sun, 15 May 2016",
+                "15 May 2016",
+        }) {
+            testDate(dateString, expected);
+        }
+    }
+
+    @Test
+    public void testTrickyDates() throws Exception {
+        DateFormat df = new SimpleDateFormat("yyyy-MM-dd", new DateFormatSymbols(Locale.US));
+        //make sure there are no mis-parses of e.g. 90 = year 90 A.D, not 1990
+        Date date1980 = df.parse("1980-01-01");
+        for (String dateString : new String[] {
+                "Mon, 29 Jan 96 14:02 GMT",
+                "7/20/95 1:12pm",
+                "08/14/2000  12:48 AM",
+                "06/24/2008, Tuesday, 11 AM",
+                "11/14/08",
+                "12/02/1996",
+                "96/12/02",
+        }) {
+            Date parsedDate = getDate(dateString);
+            if (parsedDate != null) {
+                assertTrue("date must be after 1980:"+dateString, parsedDate.getTime() > date1980.getTime());
+            }
+        }
+        //TODO: mime4j misparses these to pre 1980 dates
+        //"Wed, 27 Dec 95 11:20:40 EST",
+        //"26 Aug 00 11:14:52 EDT"
+        //
+        //We are still misparsing: 8/1/03 to a pre 1980 date
+
+    }
+
+    private void testDate(String dateString, String expected) throws Exception {
+        Date parsedDate = getDate(dateString);
+        assertNotNull("couldn't parse " + dateString, parsedDate);
+        DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'",
+                new DateFormatSymbols(Locale.US));
+        String parsedDateString = df.format(parsedDate);
+        assertEquals("failed to match: "+dateString, expected, parsedDateString);
+    }
+
+    private Date getDate(String dateString) throws Exception {
+        String mail = "From: dev@tika.apache.org\n"+
+                "Date: "+dateString+"\n";
+        Parser p = new RFC822Parser();
+        Metadata m = new Metadata();
+        try (InputStream is = TikaInputStream.get(mail.getBytes(StandardCharsets.UTF_8))) {
+            p.parse(is, new DefaultHandler(), m, new ParseContext());
+        }
+        return m.getDate(TikaCoreProperties.CREATED);
+    }
+
+
+    @Test
+    public void testMultipleSubjects() throws Exception {
+        //adapted from govdocs1 303710.txt
+        String s = "From: Shawn Jones [chiroshawn@yahoo.com]\n" +
+                "Subject: 2006N-3502\n" +
+                "Subject: I Urge You to Require Notice of Mercury";
+        Parser p = new RFC822Parser();
+        Metadata m = new Metadata();
+        p.parse(TikaInputStream.get(s.getBytes(StandardCharsets.UTF_8)), new DefaultHandler(), m, new ParseContext());
+        assertEquals("I Urge You to Require Notice of Mercury", m.get(TikaCoreProperties.TITLE));
+    }
+
+
+    @Test
+    public void testExtractAttachments() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testEmailWithPNGAtt.eml",
+                EXTRACT_ALL_ALTERNATIVES_PARSER);
+        // Check we get the metadata
+        assertEquals("Tika Test <XX...@apache.org>", metadataList.get(3).get(Metadata.MESSAGE_FROM));
+        assertEquals("Test Attachment Email", metadataList.get(3).get(TikaCoreProperties.TITLE));
+
+        // Check attachments
+        assertEquals(4, metadataList.size());
+        assertEquals("text/plain; charset=UTF-8", metadataList.get(1).get(Metadata.CONTENT_TYPE));
+        assertEquals("image/png", metadataList.get(2).get(Metadata.CONTENT_TYPE));
+        assertEquals("testPNG.png", metadataList.get(2).get(Metadata.RESOURCE_NAME_KEY));
+        assertContains("This email has a PNG attachment included in it",
+                metadataList.get(1).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals(null, metadataList.get(1).get(Metadata.CONTENT_DISPOSITION));
+        assertEquals("attachment; filename=\"testPNG.png\"", metadataList.get(2).get(Metadata.CONTENT_DISPOSITION));
+    }
+
+    @Test
+    public void testEmbeddedMetadata() throws Exception {
+        List<Metadata> seenMetadata = getRecursiveMetadata("testRFC822-multipart",
+                EXTRACT_ALL_ALTERNATIVES_PARSER);
+
+        assertEquals(4, seenMetadata.size());
+        assertEquals(null, seenMetadata.get(1).get(Metadata.CONTENT_DISPOSITION));
+        assertEquals("text/plain; charset=UTF-8", seenMetadata.get(1).get(Metadata.CONTENT_TYPE));
+        assertEquals("UTF-8", seenMetadata.get(1).get(Metadata.CONTENT_ENCODING));
+        assertEquals(null, seenMetadata.get(2).get(Metadata.CONTENT_DISPOSITION));
+        assertEquals("text/html; charset=UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_TYPE));
+        assertEquals("UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_ENCODING));
+        assertEquals("attachment; filename=\"logo.gif\"", seenMetadata.get(3).get(Metadata.CONTENT_DISPOSITION));
+        assertEquals("logo.gif", seenMetadata.get(3).get(Metadata.RESOURCE_NAME_KEY));
+        assertEquals("image/gif", seenMetadata.get(3).get(Metadata.CONTENT_TYPE));
+    }
+
+    @Test
+    public void testMultipartFlags() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-multipart", EXTRACT_ALL_ALTERNATIVES_PARSER);
+        // Check the root metadata.
+        assertEquals("mixed", metadataList.get(0).get(Message.MULTIPART_SUBTYPE));
+        assertEquals("0016e64606800312ee04913db790", metadataList.get(0).get(Message.MULTIPART_BOUNDARY));
+
+        // Check the metadata of the first alternative.
+        assertTrue(metadataList.get(1).get(Metadata.CONTENT_TYPE).equals("text/plain; charset=UTF-8"));
+        assertTrue(metadataList.get(1).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
+        assertTrue(metadataList.get(1).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
+
+        // Check the metadata of the second alternative.
+        assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).equals("text/html; charset=UTF-8"));
+        assertTrue(metadataList.get(2).get(Message.MULTIPART_SUBTYPE).equals("alternative"));
+        assertTrue(metadataList.get(2).get(Message.MULTIPART_BOUNDARY).equals("0016e64606800312ea04913db78e"));
+
+        // Check the metadata of the attached GIF.
+        assertTrue(metadataList.get(3).get(Metadata.CONTENT_TYPE).equals("image/gif"));
+        assertEquals("mixed", metadataList.get(3).get(Message.MULTIPART_SUBTYPE));
+        assertEquals("0016e64606800312ee04913db790", metadataList.get(3).get(Message.MULTIPART_BOUNDARY));
+    }
+
+    @Test
+    public void testBasicAlternativeBodyHandling() throws Exception {
+        /*
+            multi-part/mixed
+                multi-part/alternative
+                    text
+                    html
+                gif
+         */
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-multipart");
+        assertEquals(2, metadataList.size());
+        String body = metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT);
+        assertContains("body 2", body);
+        assertNotContained("body 1", body);
+        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
+        assertEquals("image/gif", metadataList.get(1).get(Metadata.CONTENT_TYPE));
+        assertEquals("/logo.gif", metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
+    }
+
+    @Test
+    public void testMixedRelatedMultipart() throws Exception {
+        /*
+            multipart/mixed (..6)
+                multipart/related (..5)
+                    multipart/alternative  (..4)
+                        text/plain
+                        text/html
+                image/jpeg (inline) Mary with cooler.jpeg  (..5)
+            image/jpeg (attachment) mary-coffee.jpg (..6)
+
+         */
+
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-mixed-simple");
+        assertEquals(3, metadataList.size());
+
+        assertContains("body 2", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertNotContained("body 1", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
+
+        assertEquals("image/jpeg", metadataList.get(1).get(Metadata.CONTENT_TYPE));
+        assertEquals("/Mary with cooler.jpeg",
+                metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
+        assertEquals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString(),
+                metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+
+        assertEquals("image/jpeg", metadataList.get(2).get(Metadata.CONTENT_TYPE));
+        assertEquals("/mary-coffee.jpg",
+                metadataList.get(2).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
+        assertEquals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString(),
+                metadataList.get(2).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+    }
+
+    @Test
+    public void testAlternativeWithComplexMixedChild() throws Exception {
+        /*
+        This tests that both html body chunks are stitched back
+        together in the body text for the main email.
+
+            multi-part/alternative
+                    text
+                    multipart/mixed
+                        html body chunk 1
+                        pdf
+                        html body chunk 2
+
+         */
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822-mixed-with-pdf-inline");
+        assertEquals(2, metadataList.size());
+        String body = metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT);
+        assertContains("body 2", body);
+        assertContains("body 3", body);
+        assertNotContained("body 1", body);
+        assertEquals("message/rfc822", metadataList.get(0).get(Metadata.CONTENT_TYPE));
+        assertEquals("application/pdf", metadataList.get(1).get(Metadata.CONTENT_TYPE));
+        assertEquals("/tzora-titan-4-hummer-xl-manual.pdf",
+                metadataList.get(1).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
+    }
+
+    @Test
+    public void testSimpleBodyInlined() throws Exception {
+        List<Metadata> metadataList = getRecursiveMetadata("testRFC822_simple_inline_body.txt");
+        assertEquals(1, metadataList.size());
+        assertContains("asked", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+
+    @Test
+    public void testGroupwise() throws Exception {
+        //TODO -- this should treat attachments as attachments, no?
+        List<Metadata>  metadataList = getRecursiveMetadata("testGroupWiseEml.eml");
+        assertEquals(1, metadataList.size());
+        assertContains("ssssss", metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+    }
+}