You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:09 UTC
[03/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
index 45f0388..da046aa 100644
--- a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
+++ b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/IdentityHtmlMapper.java
@@ -1,43 +1,43 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.html;
-
-import java.util.Locale;
-
-/**
- * Alternative HTML mapping rules that pass the input HTML as-is without any
- * modifications.
- *
- * @since Apache Tika 0.8
- */
-public class IdentityHtmlMapper implements HtmlMapper {
-
-    public static final HtmlMapper INSTANCE = new IdentityHtmlMapper();
-
-    public boolean isDiscardElement(String name) {
-        return false;
-    }
-
-    public String mapSafeAttribute(String elementName, String attributeName) {
-        return attributeName.toLowerCase(Locale.ENGLISH);
-    }
-
-    public String mapSafeElement(String name) {
-        return name.toLowerCase(Locale.ENGLISH);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.html;
+
+import java.util.Locale;
+
+/**
+ * Alternative HTML mapping rules that pass the input HTML as-is without any
+ * modifications.
+ *
+ * @since Apache Tika 0.8
+ */
+public class IdentityHtmlMapper implements HtmlMapper {
+
+    public static final HtmlMapper INSTANCE = new IdentityHtmlMapper();
+
+    public boolean isDiscardElement(String name) {
+        return false;
+    }
+
+    public String mapSafeAttribute(String elementName, String attributeName) {
+        return attributeName.toLowerCase(Locale.ENGLISH);
+    }
+
+    public String mapSafeElement(String name) {
+        return name.toLowerCase(Locale.ENGLISH);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/XHTMLDowngradeHandler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/XHTMLDowngradeHandler.java b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/XHTMLDowngradeHandler.java
index 336ae75..221a87a 100644
--- a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/XHTMLDowngradeHandler.java
+++ b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/html/XHTMLDowngradeHandler.java
@@ -1,78 +1,78 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.html;
-
-import javax.xml.XMLConstants;
-import java.util.Locale;
-
-import org.apache.tika.sax.ContentHandlerDecorator;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.AttributesImpl;
-
-/**
- * Content handler decorator that downgrades XHTML elements to
- * old-style HTML elements before passing them on to the decorated
- * content handler. This downgrading consists of dropping all namespaces
- * (and namespaced attributes) and uppercasing all element names.
- * Used by the {@link HtmlParser} to make all incoming HTML look the same.
- */
-class XHTMLDowngradeHandler extends ContentHandlerDecorator {
-
-    public XHTMLDowngradeHandler(ContentHandler handler) {
-        super(handler);
-    }
-
-    @Override
-    public void startElement(
-            String uri, String localName, String name, Attributes atts)
-            throws SAXException {
-        String upper = localName.toUpperCase(Locale.ENGLISH);
-
-        AttributesImpl attributes = new AttributesImpl();
-        for (int i = 0; i < atts.getLength(); i++) {
-            String auri = atts.getURI(i);
-            String local = atts.getLocalName(i);
-            String qname = atts.getQName(i);
-            if (XMLConstants.NULL_NS_URI.equals(auri)
-                    && !local.equals(XMLConstants.XMLNS_ATTRIBUTE)
-                    && !qname.startsWith(XMLConstants.XMLNS_ATTRIBUTE + ":")) {
-                attributes.addAttribute(
-                        auri, local, qname, atts.getType(i), atts.getValue(i));
-            }
-        }
-
-        super.startElement(XMLConstants.NULL_NS_URI, upper, upper, attributes);
-    }
-
-    @Override
-    public void endElement(String uri, String localName, String name)
-            throws SAXException {
-        String upper = localName.toUpperCase(Locale.ENGLISH);
-        super.endElement(XMLConstants.NULL_NS_URI, upper, upper);
-    }
-
-    @Override
-    public void startPrefixMapping(String prefix, String uri) {
-    }
-
-    @Override
-    public void endPrefixMapping(String prefix) {
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.html;
+
+import javax.xml.XMLConstants;
+import java.util.Locale;
+
+import org.apache.tika.sax.ContentHandlerDecorator;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Content handler decorator that downgrades XHTML elements to
+ * old-style HTML elements before passing them on to the decorated
+ * content handler. This downgrading consists of dropping all namespaces
+ * (and namespaced attributes) and uppercasing all element names.
+ * Used by the {@link HtmlParser} to make all incoming HTML look the same.
+ */
+class XHTMLDowngradeHandler extends ContentHandlerDecorator {
+
+    public XHTMLDowngradeHandler(ContentHandler handler) {
+        super(handler);
+    }
+
+    @Override
+    public void startElement(
+            String uri, String localName, String name, Attributes atts)
+            throws SAXException {
+        String upper = localName.toUpperCase(Locale.ENGLISH);
+
+        AttributesImpl attributes = new AttributesImpl();
+        for (int i = 0; i < atts.getLength(); i++) {
+            String auri = atts.getURI(i);
+            String local = atts.getLocalName(i);
+            String qname = atts.getQName(i);
+            if (XMLConstants.NULL_NS_URI.equals(auri)
+                    && !local.equals(XMLConstants.XMLNS_ATTRIBUTE)
+                    && !qname.startsWith(XMLConstants.XMLNS_ATTRIBUTE + ":")) {
+                attributes.addAttribute(
+                        auri, local, qname, atts.getType(i), atts.getValue(i));
+            }
+        }
+
+        super.startElement(XMLConstants.NULL_NS_URI, upper, upper, attributes);
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String name)
+            throws SAXException {
+        String upper = localName.toUpperCase(Locale.ENGLISH);
+        super.endElement(XMLConstants.NULL_NS_URI, upper, upper);
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri) {
+    }
+
+    @Override
+    public void endPrefixMapping(String prefix) {
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 9740eff..2c8942e 100644
--- a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -1,376 +1,376 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mail;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.DateFormat;
-import java.text.DateFormatSymbols;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.Locale;
-import java.util.TimeZone;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.james.mime4j.MimeException;
-import org.apache.james.mime4j.codec.DecodeMonitor;
-import org.apache.james.mime4j.codec.DecoderUtil;
-import org.apache.james.mime4j.dom.address.Address;
-import org.apache.james.mime4j.dom.address.AddressList;
-import org.apache.james.mime4j.dom.address.Mailbox;
-import org.apache.james.mime4j.dom.address.MailboxList;
-import org.apache.james.mime4j.dom.field.AddressListField;
-import org.apache.james.mime4j.dom.field.DateTimeField;
-import org.apache.james.mime4j.dom.field.MailboxListField;
-import org.apache.james.mime4j.dom.field.ParsedField;
-import org.apache.james.mime4j.dom.field.UnstructuredField;
-import org.apache.james.mime4j.field.LenientFieldParser;
-import org.apache.james.mime4j.parser.ContentHandler;
-import org.apache.james.mime4j.stream.BodyDescriptor;
-import org.apache.james.mime4j.stream.Field;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.SAXException;
-
-import static org.apache.tika.utils.DateUtils.MIDDAY;
-import static org.apache.tika.utils.DateUtils.UTC;
-
-/**
- * Bridge between mime4j's content handler and the generic Sax content handler
- * used by Tika. See
- * http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/ContentHandler.html
- */
-class MailContentHandler implements ContentHandler {
-
-    //TIKA-1970 Mac Mail's format
-    private static final Pattern GENERAL_TIME_ZONE_NO_MINUTES_PATTERN =
-            Pattern.compile("(?:UTC|GMT)([+-])(\\d?\\d)\\Z");
-
-    //find a time ending in am/pm without a space: 10:30am and
-    //use this pattern to insert space: 10:30 am
-    private static final Pattern AM_PM = Pattern.compile("(?i)(\\d)([ap]m)\\b");
-
-    private static final DateFormat[] ALTERNATE_DATE_FORMATS = new DateFormat[] {
-            //note that the string is "cleaned" before processing:
-            //1) condense multiple whitespace to single space
-            //2) trim()
-            //3) strip out commas
-            //4) insert space before am/pm
-
-            //May 16 2016 1:32am
-            createDateFormat("MMM dd yy hh:mm a", null),
-
-            //this is a standard pattern handled by mime4j;
-            //but mime4j fails with leading whitespace
-            createDateFormat("EEE d MMM yy HH:mm:ss Z", UTC),
-
-            createDateFormat("EEE d MMM yy HH:mm:ss z", UTC),
-
-            createDateFormat("EEE d MMM yy HH:mm:ss", null),// no timezone
-
-            createDateFormat("EEEEE MMM d yy hh:mm a", null),// Sunday, May 15 2016 1:32 PM
-
-            //16 May 2016 at 09:30:32  GMT+1 (Mac Mail TIKA-1970)
-            createDateFormat("d MMM yy 'at' HH:mm:ss z", UTC),   // UTC/Zulu
-
-            createDateFormat("yy-MM-dd HH:mm:ss", null),
-
-            createDateFormat("MM/dd/yy hh:mm a", null, false),
-
-            //now dates without times
-            createDateFormat("MMM d yy", MIDDAY, false),
-            createDateFormat("EEE d MMM yy", MIDDAY, false),
-            createDateFormat("d MMM yy", MIDDAY, false),
-            createDateFormat("yy/MM/dd", MIDDAY, false),
-            createDateFormat("MM/dd/yy", MIDDAY, false)
-    };
-
-    private static DateFormat createDateFormat(String format, TimeZone timezone) {
-        return createDateFormat(format, timezone, true);
-    }
-
-    private static DateFormat createDateFormat(String format, TimeZone timezone, boolean isLenient) {
-        SimpleDateFormat sdf =
-                new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
-        if (timezone != null) {
-            sdf.setTimeZone(timezone);
-        }
-        sdf.setLenient(isLenient);
-        return sdf;
-    }
-
-    private boolean strictParsing = false;
-
-    private XHTMLContentHandler handler;
-    private Metadata metadata;
-    private EmbeddedDocumentExtractor extractor;
-
-    private boolean inPart = false;
-
-    MailContentHandler(XHTMLContentHandler xhtml, Metadata metadata, ParseContext context, boolean strictParsing) {
-        this.handler = xhtml;
-        this.metadata = metadata;
-        this.strictParsing = strictParsing;
-
-        // Fetch / Build an EmbeddedDocumentExtractor with which
-        //  to handle/process the parts/attachments
-
-        // Was an EmbeddedDocumentExtractor explicitly supplied?
-        this.extractor = context.get(EmbeddedDocumentExtractor.class);
-
-        // If there's no EmbeddedDocumentExtractor, then try using a normal parser
-        // This will ensure that the contents are made available to the user, so
-        //  the see the text, but without fine-grained control/extraction
-        // (This also maintains backward compatibility with older versions!)
-        if (this.extractor == null) {
-            // If the user gave a parser, use that, if not the default
-            Parser parser = context.get(AutoDetectParser.class);
-            if (parser == null) {
-                parser = context.get(Parser.class);
-            }
-            if (parser == null) {
-                TikaConfig tikaConfig = context.get(TikaConfig.class);
-                if (tikaConfig == null) {
-                    tikaConfig = TikaConfig.getDefaultConfig();
-                }
-                parser = new AutoDetectParser(tikaConfig.getParser());
-            }
-            ParseContext ctx = new ParseContext();
-            ctx.set(Parser.class, parser);
-            extractor = new ParsingEmbeddedDocumentExtractor(ctx);
-        }
-    }
-
-    public void body(BodyDescriptor body, InputStream is) throws MimeException,
-            IOException {
-        // use a different metadata object
-        // in order to specify the mime type of the
-        // sub part without damaging the main metadata
-
-        Metadata submd = new Metadata();
-        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
-        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());
-
-        try {
-            if (extractor.shouldParseEmbedded(submd)) {
-                extractor.parseEmbedded(is, handler, submd, false);
-            }
-        } catch (SAXException e) {
-            throw new MimeException(e);
-        }
-    }
-
-    public void endBodyPart() throws MimeException {
-        try {
-            handler.endElement("p");
-            handler.endElement("div");
-        } catch (SAXException e) {
-            throw new MimeException(e);
-        }
-    }
-
-    public void endHeader() throws MimeException {
-    }
-
-    public void startMessage() throws MimeException {
-        try {
-            handler.startDocument();
-        } catch (SAXException e) {
-            throw new MimeException(e);
-        }
-    }
-
-    public void endMessage() throws MimeException {
-        try {
-            handler.endDocument();
-        } catch (SAXException e) {
-            throw new MimeException(e);
-        }
-    }
-
-    public void endMultipart() throws MimeException {
-        inPart = false;
-    }
-
-    public void epilogue(InputStream is) throws MimeException, IOException {
-    }
-
-    /**
-     * Header for the whole message or its parts
-     *
-     * @see <a href="http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/">
-     *     http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/</a>
-     * Field.html
-     */
-    public void field(Field field) throws MimeException {
-        // inPart indicates whether these metadata correspond to the
-        // whole message or its parts
-        if (inPart) {
-            return;
-        }
-
-        try {
-            String fieldname = field.getName();
-            ParsedField parsedField = LenientFieldParser.getParser().parse(
-                    field, DecodeMonitor.SILENT);
-            if (fieldname.equalsIgnoreCase("From")) {
-                MailboxListField fromField = (MailboxListField) parsedField;
-                MailboxList mailboxList = fromField.getMailboxList();
-                if (fromField.isValidField() && mailboxList != null) {
-                    for (Address address : mailboxList) {
-                        String from = getDisplayString(address);
-                        metadata.add(Metadata.MESSAGE_FROM, from);
-                        metadata.add(TikaCoreProperties.CREATOR, from);
-                    }
-                } else {
-                    String from = stripOutFieldPrefix(field, "From:");
-                    if (from.startsWith("<")) {
-                        from = from.substring(1);
-                    }
-                    if (from.endsWith(">")) {
-                        from = from.substring(0, from.length() - 1);
-                    }
-                    metadata.add(Metadata.MESSAGE_FROM, from);
-                    metadata.add(TikaCoreProperties.CREATOR, from);
-                }
-            } else if (fieldname.equalsIgnoreCase("Subject")) {
-                metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE,
-                        ((UnstructuredField) parsedField).getValue());
-            } else if (fieldname.equalsIgnoreCase("To")) {
-                processAddressList(parsedField, "To:", Metadata.MESSAGE_TO);
-            } else if (fieldname.equalsIgnoreCase("CC")) {
-                processAddressList(parsedField, "Cc:", Metadata.MESSAGE_CC);
-            } else if (fieldname.equalsIgnoreCase("BCC")) {
-                processAddressList(parsedField, "Bcc:", Metadata.MESSAGE_BCC);
-            } else if (fieldname.equalsIgnoreCase("Date")) {
-                DateTimeField dateField = (DateTimeField) parsedField;
-                Date date = dateField.getDate();
-                if (date == null) {
-                    date = tryOtherDateFormats(field.getBody());
-                }
-                metadata.set(TikaCoreProperties.CREATED, date);
-            }
-        } catch (RuntimeException me) {
-            if (strictParsing) {
-                throw me;
-            }
-        }
-    }
-
-    private static synchronized Date tryOtherDateFormats(String text) {
-        if (text == null) {
-            return null;
-        }
-        //strip out additional spaces and trim
-        text = text.replaceAll("\\s+", " ").trim();
-
-        //strip out commas
-        text = text.replaceAll(",", "");
-        Matcher matcher = GENERAL_TIME_ZONE_NO_MINUTES_PATTERN.matcher(text);
-        if (matcher.find()) {
-            text = matcher.replaceFirst("GMT$1$2:00");
-        }
-
-        matcher = AM_PM.matcher(text);
-        if (matcher.find()) {
-            text = matcher.replaceFirst("$1 $2");
-        }
-
-        for (DateFormat format : ALTERNATE_DATE_FORMATS) {
-            try {
-                return format.parse(text);
-            } catch (ParseException e) {
-            }
-        }
-        return null;
-    }
-
-    private void processAddressList(ParsedField field, String addressListType,
-                                    String metadataField) throws MimeException {
-        AddressListField toField = (AddressListField) field;
-        if (toField.isValidField()) {
-            AddressList addressList = toField.getAddressList();
-            for (Address address : addressList) {
-                metadata.add(metadataField, getDisplayString(address));
-            }
-        } else {
-            String to = stripOutFieldPrefix(field,
-                    addressListType);
-            for (String eachTo : to.split(",")) {
-                metadata.add(metadataField, eachTo.trim());
-            }
-        }
-    }
-
-    private String getDisplayString(Address address) {
-        if (address instanceof Mailbox) {
-            Mailbox mailbox = (Mailbox) address;
-            String name = mailbox.getName();
-            if (name != null && name.length() > 0) {
-                name = DecoderUtil.decodeEncodedWords(name, DecodeMonitor.SILENT);
-                return name + " <" + mailbox.getAddress() + ">";
-            } else {
-                return mailbox.getAddress();
-            }
-        } else {
-            return address.toString();
-        }
-    }
-
-    public void preamble(InputStream is) throws MimeException, IOException {
-    }
-
-    public void raw(InputStream is) throws MimeException, IOException {
-    }
-
-    public void startBodyPart() throws MimeException {
-        try {
-            handler.startElement("div", "class", "email-entry");
-            handler.startElement("p");
-        } catch (SAXException e) {
-            throw new MimeException(e);
-        }
-    }
-
-    public void startHeader() throws MimeException {
-        // TODO Auto-generated method stub
-
-    }
-
-    public void startMultipart(BodyDescriptor descr) throws MimeException {
-        inPart = true;
-    }
-
-    private String stripOutFieldPrefix(Field field, String fieldname) {
-        String temp = field.getRaw().toString();
-        int loc = fieldname.length();
-        while (temp.charAt(loc) == ' ') {
-            loc++;
-        }
-        return temp.substring(loc);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mail;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.DateFormat;
+import java.text.DateFormatSymbols;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.TimeZone;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.james.mime4j.MimeException;
+import org.apache.james.mime4j.codec.DecodeMonitor;
+import org.apache.james.mime4j.codec.DecoderUtil;
+import org.apache.james.mime4j.dom.address.Address;
+import org.apache.james.mime4j.dom.address.AddressList;
+import org.apache.james.mime4j.dom.address.Mailbox;
+import org.apache.james.mime4j.dom.address.MailboxList;
+import org.apache.james.mime4j.dom.field.AddressListField;
+import org.apache.james.mime4j.dom.field.DateTimeField;
+import org.apache.james.mime4j.dom.field.MailboxListField;
+import org.apache.james.mime4j.dom.field.ParsedField;
+import org.apache.james.mime4j.dom.field.UnstructuredField;
+import org.apache.james.mime4j.field.LenientFieldParser;
+import org.apache.james.mime4j.parser.ContentHandler;
+import org.apache.james.mime4j.stream.BodyDescriptor;
+import org.apache.james.mime4j.stream.Field;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
+import static org.apache.tika.utils.DateUtils.MIDDAY;
+import static org.apache.tika.utils.DateUtils.UTC;
+
+/**
+ * Bridge between mime4j's content handler and the generic Sax content handler
+ * used by Tika. See
+ * http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/ContentHandler.html
+ */
+class MailContentHandler implements ContentHandler {
+
+    //TIKA-1970 Mac Mail's format
+    private static final Pattern GENERAL_TIME_ZONE_NO_MINUTES_PATTERN =
+            Pattern.compile("(?:UTC|GMT)([+-])(\\d?\\d)\\Z");
+
+    //find a time ending in am/pm without a space: 10:30am and
+    //use this pattern to insert space: 10:30 am
+    private static final Pattern AM_PM = Pattern.compile("(?i)(\\d)([ap]m)\\b");
+
+    private static final DateFormat[] ALTERNATE_DATE_FORMATS = new DateFormat[] {
+            //note that the string is "cleaned" before processing:
+            //1) condense multiple whitespace to single space
+            //2) trim()
+            //3) strip out commas
+            //4) insert space before am/pm
+
+            //May 16 2016 1:32am
+            createDateFormat("MMM dd yy hh:mm a", null),
+
+            //this is a standard pattern handled by mime4j;
+            //but mime4j fails with leading whitespace
+            createDateFormat("EEE d MMM yy HH:mm:ss Z", UTC),
+
+            createDateFormat("EEE d MMM yy HH:mm:ss z", UTC),
+
+            createDateFormat("EEE d MMM yy HH:mm:ss", null),// no timezone
+
+            createDateFormat("EEEEE MMM d yy hh:mm a", null),// Sunday, May 15 2016 1:32 PM
+
+            //16 May 2016 at 09:30:32  GMT+1 (Mac Mail TIKA-1970)
+            createDateFormat("d MMM yy 'at' HH:mm:ss z", UTC),   // UTC/Zulu
+
+            createDateFormat("yy-MM-dd HH:mm:ss", null),
+
+            createDateFormat("MM/dd/yy hh:mm a", null, false),
+
+            //now dates without times
+            createDateFormat("MMM d yy", MIDDAY, false),
+            createDateFormat("EEE d MMM yy", MIDDAY, false),
+            createDateFormat("d MMM yy", MIDDAY, false),
+            createDateFormat("yy/MM/dd", MIDDAY, false),
+            createDateFormat("MM/dd/yy", MIDDAY, false)
+    };
+
+    private static DateFormat createDateFormat(String format, TimeZone timezone) {
+        return createDateFormat(format, timezone, true);
+    }
+
+    private static DateFormat createDateFormat(String format, TimeZone timezone, boolean isLenient) {
+        SimpleDateFormat sdf =
+                new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
+        if (timezone != null) {
+            sdf.setTimeZone(timezone);
+        }
+        sdf.setLenient(isLenient);
+        return sdf;
+    }
+
+    private boolean strictParsing = false;
+
+    private XHTMLContentHandler handler;
+    private Metadata metadata;
+    private EmbeddedDocumentExtractor extractor;
+
+    private boolean inPart = false;
+
+    MailContentHandler(XHTMLContentHandler xhtml, Metadata metadata, ParseContext context, boolean strictParsing) {
+        this.handler = xhtml;
+        this.metadata = metadata;
+        this.strictParsing = strictParsing;
+
+        // Fetch / Build an EmbeddedDocumentExtractor with which
+        //  to handle/process the parts/attachments
+
+        // Was an EmbeddedDocumentExtractor explicitly supplied?
+        this.extractor = context.get(EmbeddedDocumentExtractor.class);
+
+        // If there's no EmbeddedDocumentExtractor, then try using a normal parser
+        // This will ensure that the contents are made available to the user, so
+        //  the see the text, but without fine-grained control/extraction
+        // (This also maintains backward compatibility with older versions!)
+        if (this.extractor == null) {
+            // If the user gave a parser, use that, if not the default
+            Parser parser = context.get(AutoDetectParser.class);
+            if (parser == null) {
+                parser = context.get(Parser.class);
+            }
+            if (parser == null) {
+                TikaConfig tikaConfig = context.get(TikaConfig.class);
+                if (tikaConfig == null) {
+                    tikaConfig = TikaConfig.getDefaultConfig();
+                }
+                parser = new AutoDetectParser(tikaConfig.getParser());
+            }
+            ParseContext ctx = new ParseContext();
+            ctx.set(Parser.class, parser);
+            extractor = new ParsingEmbeddedDocumentExtractor(ctx);
+        }
+    }
+
+    public void body(BodyDescriptor body, InputStream is) throws MimeException,
+            IOException {
+        // use a different metadata object
+        // in order to specify the mime type of the
+        // sub part without damaging the main metadata
+
+        Metadata submd = new Metadata();
+        submd.set(Metadata.CONTENT_TYPE, body.getMimeType());
+        submd.set(Metadata.CONTENT_ENCODING, body.getCharset());
+
+        try {
+            if (extractor.shouldParseEmbedded(submd)) {
+                extractor.parseEmbedded(is, handler, submd, false);
+            }
+        } catch (SAXException e) {
+            throw new MimeException(e);
+        }
+    }
+
+    public void endBodyPart() throws MimeException {
+        try {
+            handler.endElement("p");
+            handler.endElement("div");
+        } catch (SAXException e) {
+            throw new MimeException(e);
+        }
+    }
+
+    public void endHeader() throws MimeException {
+    }
+
+    public void startMessage() throws MimeException {
+        try {
+            handler.startDocument();
+        } catch (SAXException e) {
+            throw new MimeException(e);
+        }
+    }
+
+    public void endMessage() throws MimeException {
+        try {
+            handler.endDocument();
+        } catch (SAXException e) {
+            throw new MimeException(e);
+        }
+    }
+
+    public void endMultipart() throws MimeException {
+        inPart = false;
+    }
+
+    public void epilogue(InputStream is) throws MimeException, IOException {
+    }
+
+    /**
+     * Header for the whole message or its parts
+     *
+     * @see <a href="http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/">
+     *     http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/</a>
+     * Field.html
+     */
+    public void field(Field field) throws MimeException {
+        // inPart indicates whether these metadata correspond to the
+        // whole message or its parts
+        if (inPart) {
+            return;
+        }
+
+        try {
+            String fieldname = field.getName();
+            ParsedField parsedField = LenientFieldParser.getParser().parse(
+                    field, DecodeMonitor.SILENT);
+            if (fieldname.equalsIgnoreCase("From")) {
+                MailboxListField fromField = (MailboxListField) parsedField;
+                MailboxList mailboxList = fromField.getMailboxList();
+                if (fromField.isValidField() && mailboxList != null) {
+                    for (Address address : mailboxList) {
+                        String from = getDisplayString(address);
+                        metadata.add(Metadata.MESSAGE_FROM, from);
+                        metadata.add(TikaCoreProperties.CREATOR, from);
+                    }
+                } else {
+                    String from = stripOutFieldPrefix(field, "From:");
+                    if (from.startsWith("<")) {
+                        from = from.substring(1);
+                    }
+                    if (from.endsWith(">")) {
+                        from = from.substring(0, from.length() - 1);
+                    }
+                    metadata.add(Metadata.MESSAGE_FROM, from);
+                    metadata.add(TikaCoreProperties.CREATOR, from);
+                }
+            } else if (fieldname.equalsIgnoreCase("Subject")) {
+                metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE,
+                        ((UnstructuredField) parsedField).getValue());
+            } else if (fieldname.equalsIgnoreCase("To")) {
+                processAddressList(parsedField, "To:", Metadata.MESSAGE_TO);
+            } else if (fieldname.equalsIgnoreCase("CC")) {
+                processAddressList(parsedField, "Cc:", Metadata.MESSAGE_CC);
+            } else if (fieldname.equalsIgnoreCase("BCC")) {
+                processAddressList(parsedField, "Bcc:", Metadata.MESSAGE_BCC);
+            } else if (fieldname.equalsIgnoreCase("Date")) {
+                DateTimeField dateField = (DateTimeField) parsedField;
+                Date date = dateField.getDate();
+                if (date == null) {
+                    date = tryOtherDateFormats(field.getBody());
+                }
+                metadata.set(TikaCoreProperties.CREATED, date);
+            }
+        } catch (RuntimeException me) {
+            if (strictParsing) {
+                throw me;
+            }
+        }
+    }
+
+    private static synchronized Date tryOtherDateFormats(String text) {
+        if (text == null) {
+            return null;
+        }
+        //strip out additional spaces and trim
+        text = text.replaceAll("\\s+", " ").trim();
+
+        //strip out commas
+        text = text.replaceAll(",", "");
+        Matcher matcher = GENERAL_TIME_ZONE_NO_MINUTES_PATTERN.matcher(text);
+        if (matcher.find()) {
+            text = matcher.replaceFirst("GMT$1$2:00");
+        }
+
+        matcher = AM_PM.matcher(text);
+        if (matcher.find()) {
+            text = matcher.replaceFirst("$1 $2");
+        }
+
+        for (DateFormat format : ALTERNATE_DATE_FORMATS) {
+            try {
+                return format.parse(text);
+            } catch (ParseException e) {
+            }
+        }
+        return null;
+    }
+
+    private void processAddressList(ParsedField field, String addressListType,
+                                    String metadataField) throws MimeException {
+        AddressListField toField = (AddressListField) field;
+        if (toField.isValidField()) {
+            AddressList addressList = toField.getAddressList();
+            for (Address address : addressList) {
+                metadata.add(metadataField, getDisplayString(address));
+            }
+        } else {
+            String to = stripOutFieldPrefix(field,
+                    addressListType);
+            for (String eachTo : to.split(",")) {
+                metadata.add(metadataField, eachTo.trim());
+            }
+        }
+    }
+
+    private String getDisplayString(Address address) {
+        if (address instanceof Mailbox) {
+            Mailbox mailbox = (Mailbox) address;
+            String name = mailbox.getName();
+            if (name != null && name.length() > 0) {
+                name = DecoderUtil.decodeEncodedWords(name, DecodeMonitor.SILENT);
+                return name + " <" + mailbox.getAddress() + ">";
+            } else {
+                return mailbox.getAddress();
+            }
+        } else {
+            return address.toString();
+        }
+    }
+
+    public void preamble(InputStream is) throws MimeException, IOException {
+    }
+
+    public void raw(InputStream is) throws MimeException, IOException {
+    }
+
+    public void startBodyPart() throws MimeException {
+        try {
+            handler.startElement("div", "class", "email-entry");
+            handler.startElement("p");
+        } catch (SAXException e) {
+            throw new MimeException(e);
+        }
+    }
+
+    public void startHeader() throws MimeException {
+        // TODO Auto-generated method stub
+
+    }
+
+    public void startMultipart(BodyDescriptor descr) throws MimeException {
+        inPart = true;
+    }
+
+    private String stripOutFieldPrefix(Field field, String fieldname) {
+        String temp = field.getRaw().toString();
+        int loc = fieldname.length();
+        while (temp.charAt(loc) == ' ') {
+            loc++;
+        }
+        return temp.substring(loc);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
index 9ac02a7..6299d3f 100644
--- a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
+++ b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
@@ -1,95 +1,95 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.mail;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.james.mime4j.MimeException;
-import org.apache.james.mime4j.parser.MimeStreamParser;
-import org.apache.james.mime4j.stream.MimeConfig;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Uses apache-mime4j to parse emails. Each part is treated with the
- * corresponding parser and displayed within elements.
- * <p/>
- * A {@link MimeEntityConfig} object can be passed in the parsing context
- * to better control the parsing process.
- *
- * @author jnioche@digitalpebble.com
- */
-public class RFC822Parser extends AbstractParser {
-    /**
-     * Serial version UID
-     */
-    private static final long serialVersionUID = -5504243905998074168L;
-
-    private static final Set<MediaType> SUPPORTED_TYPES = Collections
-            .singleton(MediaType.parse("message/rfc822"));
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(InputStream stream, ContentHandler handler,
-                      Metadata metadata, ParseContext context) throws IOException,
-            SAXException, TikaException {
-        // Get the mime4j configuration, or use a default one
-        MimeConfig config = new MimeConfig();
-        config.setMaxLineLen(100000);
-        config.setMaxHeaderLen(100000); // max length of any individual header
-        config = context.get(MimeConfig.class, config);
-
-        MimeStreamParser parser = new MimeStreamParser(config);
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-
-        MailContentHandler mch = new MailContentHandler(
-                xhtml, metadata, context, config.isStrictParsing());
-        parser.setContentHandler(mch);
-        parser.setContentDecoding(true);
-        
-        TikaInputStream tstream = TikaInputStream.get(stream);
-        try {
-            parser.parse(tstream);
-        } catch (IOException e) {
-            tstream.throwIfCauseOf(e);
-            throw new TikaException("Failed to parse an email message", e);
-        } catch (MimeException e) {
-            // Unwrap the exception in case it was not thrown by mime4j
-            Throwable cause = e.getCause();
-            if (cause instanceof TikaException) {
-                throw (TikaException) cause;
-            } else if (cause instanceof SAXException) {
-                throw (SAXException) cause;
-            } else {
-                throw new TikaException("Failed to parse an email message", e);
-            }
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mail;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.james.mime4j.MimeException;
+import org.apache.james.mime4j.parser.MimeStreamParser;
+import org.apache.james.mime4j.stream.MimeConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Uses apache-mime4j to parse emails. Each part is treated with the
+ * corresponding parser and displayed within elements.
+ * <p/>
+ * A {@link MimeEntityConfig} object can be passed in the parsing context
+ * to better control the parsing process.
+ *
+ * @author jnioche@digitalpebble.com
+ */
+public class RFC822Parser extends AbstractParser {
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = -5504243905998074168L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections
+            .singleton(MediaType.parse("message/rfc822"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        // Get the mime4j configuration, or use a default one
+        MimeConfig config = new MimeConfig();
+        config.setMaxLineLen(100000);
+        config.setMaxHeaderLen(100000); // max length of any individual header
+        config = context.get(MimeConfig.class, config);
+
+        MimeStreamParser parser = new MimeStreamParser(config);
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+
+        MailContentHandler mch = new MailContentHandler(
+                xhtml, metadata, context, config.isStrictParsing());
+        parser.setContentHandler(mch);
+        parser.setContentDecoding(true);
+        
+        TikaInputStream tstream = TikaInputStream.get(stream);
+        try {
+            parser.parse(tstream);
+        } catch (IOException e) {
+            tstream.throwIfCauseOf(e);
+            throw new TikaException("Failed to parse an email message", e);
+        } catch (MimeException e) {
+            // Unwrap the exception in case it was not thrown by mime4j
+            Throwable cause = e.getCause();
+            if (cause instanceof TikaException) {
+                throw (TikaException) cause;
+            } else if (cause instanceof SAXException) {
+                throw (SAXException) cause;
+            } else {
+                throw new TikaException("Failed to parse an email message", e);
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/feed/FeedParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/feed/FeedParserTest.java b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/feed/FeedParserTest.java
index 5be4b0b..cc10dd2 100644
--- a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/feed/FeedParserTest.java
+++ b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/feed/FeedParserTest.java
@@ -1,75 +1,75 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.feed;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class FeedParserTest {
-    @Test
-    public void testRSSParser() throws Exception {
-        try (InputStream input = FeedParserTest.class.getResourceAsStream(
-                "/test-documents/rsstest.rss")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-
-            new FeedParser().parse(input, handler, metadata, context);
-
-            String content = handler.toString();
-            assertFalse(content == null);
-
-            assertEquals("Sample RSS File for Junit test",
-                    metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("TestChannel", metadata.get(TikaCoreProperties.TITLE));
-
-            // TODO find a way of testing the paragraphs and anchors
-        }
-    }
-
-
-    @Test
-    public void testAtomParser() throws Exception {
-        try (InputStream input = FeedParserTest.class.getResourceAsStream(
-                "/test-documents/testATOM.atom")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            ParseContext context = new ParseContext();
-
-            new FeedParser().parse(input, handler, metadata, context);
-
-            String content = handler.toString();
-            assertFalse(content == null);
-
-            assertEquals("Sample Atom File for Junit test",
-                    metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("Test Atom Feed", metadata.get(TikaCoreProperties.TITLE));
-
-            // TODO Check some more
-        }
-    }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.feed;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class FeedParserTest {
+    @Test
+    public void testRSSParser() throws Exception {
+        try (InputStream input = FeedParserTest.class.getResourceAsStream(
+                "/test-documents/rsstest.rss")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+
+            new FeedParser().parse(input, handler, metadata, context);
+
+            String content = handler.toString();
+            assertFalse(content == null);
+
+            assertEquals("Sample RSS File for Junit test",
+                    metadata.get(TikaCoreProperties.DESCRIPTION));
+            assertEquals("TestChannel", metadata.get(TikaCoreProperties.TITLE));
+
+            // TODO find a way of testing the paragraphs and anchors
+        }
+    }
+
+
+    @Test
+    public void testAtomParser() throws Exception {
+        try (InputStream input = FeedParserTest.class.getResourceAsStream(
+                "/test-documents/testATOM.atom")) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            ParseContext context = new ParseContext();
+
+            new FeedParser().parse(input, handler, metadata, context);
+
+            String content = handler.toString();
+            assertFalse(content == null);
+
+            assertEquals("Sample Atom File for Junit test",
+                    metadata.get(TikaCoreProperties.DESCRIPTION));
+            assertEquals("Test Atom Feed", metadata.get(TikaCoreProperties.TITLE));
+
+            // TODO Check some more
+        }
+    }
+
+}