You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/03/07 02:57:26 UTC
svn commit: r1575120 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
Author: tallison
Date: Fri Mar 7 01:57:26 2014
New Revision: 1575120
URL: http://svn.apache.org/r1575120
Log:
cleanup whitespace in OutlookPSTParser
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java?rev=1575120&r1=1575119&r2=1575120&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java Fri Mar 7 01:57:26 2014
@@ -51,50 +51,50 @@ import com.pff.PSTMessage;
public class OutlookPSTParser extends AbstractParser {
- private static final long serialVersionUID = 620998217748364063L;
+ private static final long serialVersionUID = 620998217748364063L;
- private static final MediaType MS_OUTLOOK_PST_MIMETYPE = MediaType.application("vnd.ms-outlook-pst");
- private static final Set<MediaType> SUPPORTED_TYPES = singleton(MS_OUTLOOK_PST_MIMETYPE);
+ private static final MediaType MS_OUTLOOK_PST_MIMETYPE = MediaType.application("vnd.ms-outlook-pst");
+ private static final Set<MediaType> SUPPORTED_TYPES = singleton(MS_OUTLOOK_PST_MIMETYPE);
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
-
- // Use the delegate parser to parse the contained document
- EmbeddedDocumentExtractor embeddedExtractor = context.get(
- EmbeddedDocumentExtractor.class,
- new ParsingEmbeddedDocumentExtractor(context));
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+
+ // Use the delegate parser to parse the contained document
+ EmbeddedDocumentExtractor embeddedExtractor = context.get(
+ EmbeddedDocumentExtractor.class,
+ new ParsingEmbeddedDocumentExtractor(context));
metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString());
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
TikaInputStream in = TikaInputStream.get(stream);
try {
- PSTFile pstFile = new PSTFile(in.getFile().getPath());
- metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length()));
- boolean isValid = pstFile.getFileHandle().getFD().valid();
+ PSTFile pstFile = new PSTFile(in.getFile().getPath());
+ metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length()));
+ boolean isValid = pstFile.getFileHandle().getFD().valid();
metadata.set("isValid", valueOf(isValid));
if (isValid) {
- parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
+ parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor);
}
- } catch (Exception e) {
- throw new TikaException(e.getMessage());
- }
+ } catch (Exception e) {
+ throw new TikaException(e.getMessage());
+ }
xhtml.endDocument();
- }
+ }
- private static void parseFolder(XHTMLContentHandler xhtml, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor) throws Exception {
- if (pstFolder.getContentCount() > 0) {
- PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild();
- while (pstMail != null) {
- xhtml.startElement("div", createAttribute("class", "email-entry"));
+ private static void parseFolder(XHTMLContentHandler xhtml, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor) throws Exception {
+ if (pstFolder.getContentCount() > 0) {
+ PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild();
+ while (pstMail != null) {
+ xhtml.startElement("div", createAttribute("class", "email-entry"));
xhtml.element("h1", pstMail.getInternetMessageId());
createMetadata(xhtml, "subject", pstMail.getSubject());
createMetadata(xhtml, "internetMessageId", valueOf(pstMail.getInternetMessageId()));
@@ -105,74 +105,74 @@ public class OutlookPSTParser extends Ab
createMetadata(xhtml, "recipients", pstMail.getRecipientsString());
xhtml.element("p", pstMail.getBody());
- parseAttachments(xhtml, pstMail, embeddedExtractor);
+ parseAttachments(xhtml, pstMail, embeddedExtractor);
- xhtml.endElement("div");
- pstMail = (PSTMessage) pstFolder.getNextChild();
- }
- }
-
- if (pstFolder.hasSubfolders()) {
- for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) {
- xhtml.startElement("div", createAttribute("class", "email-folder"));
- xhtml.element("h1", pstSubFolder.getDisplayName());
- parseFolder(xhtml, pstSubFolder, embeddedExtractor);
- xhtml.endElement("div");
- }
- }
- }
-
- private static void createMetadata(XHTMLContentHandler xhtml, String metaName, String metaValue) throws SAXException {
- xhtml.startElement("meta", createAttribute(metaName, metaValue));
- xhtml.endElement("meta");
- }
-
- private static AttributesImpl createAttribute(String attName, String attValue) {
- AttributesImpl attributes = new AttributesImpl();
- attributes.addAttribute("", attName, attName, "CDATA", attValue);
- return attributes;
- }
-
- private static void parseAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor) throws TikaException {
- int numberOfAttachments = email.getNumberOfAttachments();
- for (int i = 0; i < numberOfAttachments; i++) {
- File tempFile = null;
- try {
- PSTAttachment attach = email.getAttachment(i);
-
- // Get the filename; both long and short filenames can be used for attachments
- String filename = attach.getLongFilename();
- if (filename.isEmpty()) {
- filename = attach.getFilename();
- }
-
- xhtml.element("p", filename);
-
- Metadata attachMeta = new Metadata();
- attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
- attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
- AttributesImpl attributes = new AttributesImpl();
+ xhtml.endElement("div");
+ pstMail = (PSTMessage) pstFolder.getNextChild();
+ }
+ }
+
+ if (pstFolder.hasSubfolders()) {
+ for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) {
+ xhtml.startElement("div", createAttribute("class", "email-folder"));
+ xhtml.element("h1", pstSubFolder.getDisplayName());
+ parseFolder(xhtml, pstSubFolder, embeddedExtractor);
+ xhtml.endElement("div");
+ }
+ }
+ }
+
+ private static void createMetadata(XHTMLContentHandler xhtml, String metaName, String metaValue) throws SAXException {
+ xhtml.startElement("meta", createAttribute(metaName, metaValue));
+ xhtml.endElement("meta");
+ }
+
+ private static AttributesImpl createAttribute(String attName, String attValue) {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", attName, attName, "CDATA", attValue);
+ return attributes;
+ }
+
+ private static void parseAttachments(XHTMLContentHandler xhtml, PSTMessage email, EmbeddedDocumentExtractor embeddedExtractor) throws TikaException {
+ int numberOfAttachments = email.getNumberOfAttachments();
+ for (int i = 0; i < numberOfAttachments; i++) {
+ File tempFile = null;
+ try {
+ PSTAttachment attach = email.getAttachment(i);
+
+ // Get the filename; both long and short filenames can be used for attachments
+ String filename = attach.getLongFilename();
+ if (filename.isEmpty()) {
+ filename = attach.getFilename();
+ }
+
+ xhtml.element("p", filename);
+
+ Metadata attachMeta = new Metadata();
+ attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
+ attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
+ AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", filename);
xhtml.startElement("div", attributes);
- if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
- TemporaryResources tmp = new TemporaryResources();
- try {
- TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
- embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
- } finally {
- tmp.dispose();
- }
- }
- xhtml.endElement("div");
-
- } catch (Exception e) {
- throw new TikaException("Unable to unpack document stream", e);
- } finally {
- if (tempFile != null)
- tempFile.delete();
- }
- }
- }
+ if (embeddedExtractor.shouldParseEmbedded(attachMeta)) {
+ TemporaryResources tmp = new TemporaryResources();
+ try {
+ TikaInputStream tis = TikaInputStream.get(attach.getFileInputStream(), tmp);
+ embeddedExtractor.parseEmbedded(tis, xhtml, attachMeta, true);
+ } finally {
+ tmp.dispose();
+ }
+ }
+ xhtml.endElement("div");
+
+ } catch (Exception e) {
+ throw new TikaException("Unable to unpack document stream", e);
+ } finally {
+ if (tempFile != null)
+ tempFile.delete();
+ }
+ }
+ }
-}
\ No newline at end of file
+}