You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/04/11 13:44:19 UTC
svn commit: r1091042 - in /tika/trunk/tika-parsers: pom.xml
src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
Author: nick
Date: Mon Apr 11 11:44:18 2011
New Revision: 1091042
URL: http://svn.apache.org/viewvc?rev=1091042&view=rev
Log:
TIKA-615 - Outlook parsing update for POI 3.8 beta 2
Modified:
tika/trunk/tika-parsers/pom.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
Modified: tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1091042&r1=1091041&r2=1091042&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Mon Apr 11 11:44:18 2011
@@ -35,7 +35,7 @@
<url>http://tika.apache.org/</url>
<properties>
- <poi.version>3.8-beta1</poi.version>
+ <poi.version>3.8-beta2</poi.version>
</properties>
<dependencies>
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=1091042&r1=1091041&r2=1091042&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java Mon Apr 11 11:44:18 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft
import java.io.ByteArrayInputStream;
import java.io.IOException;
+import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.ByteChunk;
@@ -27,12 +28,13 @@ import org.apache.poi.hsmf.datatypes.MAP
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.rtf.RTFParser;
import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
@@ -43,7 +45,7 @@ import org.xml.sax.SAXException;
public class OutlookExtractor extends AbstractPOIFSExtractor {
private final MAPIMessage msg;
- public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
+ public OutlookExtractor(NPOIFSFileSystem filesystem, ParseContext context) throws TikaException {
super(context);
try {
@@ -179,7 +181,17 @@ public class OutlookExtractor extends Ab
}
}
if(rtfChunk != null && !doneBody) {
- // TODO Needs POI 3.8 beta 2 for TNEF support
+ ByteChunk chunk = (ByteChunk)rtfChunk;
+ MAPIRtfAttribute rtf = new MAPIRtfAttribute(
+ MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue()
+ );
+ RTFParser rtfParser = new RTFParser();
+ // Disabled pending a fix to TIKA-632
+// rtfParser.parse(
+// new ByteArrayInputStream(rtf.getData()),
+// xhtml, new Metadata(), new ParseContext()
+// );
+// doneBody = true;
}
if(textChunk != null && !doneBody) {
xhtml.element("p", ((StringChunk)textChunk).getValue());