You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/04/11 13:44:19 UTC

svn commit: r1091042 - in /tika/trunk/tika-parsers: pom.xml src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java

Author: nick
Date: Mon Apr 11 11:44:18 2011
New Revision: 1091042

URL: http://svn.apache.org/viewvc?rev=1091042&view=rev
Log:
TIKA-615 - Outlook parsing update for POI 3.8 beta 2

Modified:
    tika/trunk/tika-parsers/pom.xml
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java

Modified: tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1091042&r1=1091041&r2=1091042&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Mon Apr 11 11:44:18 2011
@@ -35,7 +35,7 @@
   <url>http://tika.apache.org/</url>
 
   <properties>
-    <poi.version>3.8-beta1</poi.version>
+    <poi.version>3.8-beta2</poi.version>
   </properties>
 
   <dependencies>

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=1091042&r1=1091041&r2=1091042&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java Mon Apr 11 11:44:18 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 
+import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
 import org.apache.poi.hsmf.datatypes.ByteChunk;
@@ -27,12 +28,13 @@ import org.apache.poi.hsmf.datatypes.MAP
 import org.apache.poi.hsmf.datatypes.StringChunk;
 import org.apache.poi.hsmf.datatypes.Types;
 import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.rtf.RTFParser;
 import org.apache.tika.parser.txt.CharsetDetector;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.SAXException;
@@ -43,7 +45,7 @@ import org.xml.sax.SAXException;
 public class OutlookExtractor extends AbstractPOIFSExtractor {
     private final MAPIMessage msg;
 
-    public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
+    public OutlookExtractor(NPOIFSFileSystem filesystem, ParseContext context) throws TikaException {
         super(context);
         
         try {
@@ -179,7 +181,17 @@ public class OutlookExtractor extends Ab
               }
            }
            if(rtfChunk != null && !doneBody) {
-              // TODO Needs POI 3.8 beta 2 for TNEF support
+              ByteChunk chunk = (ByteChunk)rtfChunk;
+              MAPIRtfAttribute rtf = new MAPIRtfAttribute(
+                    MAPIProperty.RTF_COMPRESSED, Types.BINARY, chunk.getValue()
+              );
+              RTFParser rtfParser = new RTFParser();
+              // Disabled pending a fix to TIKA-632
+//              rtfParser.parse(
+//                    new ByteArrayInputStream(rtf.getData()),
+//                    xhtml, new Metadata(), new ParseContext()
+//              );
+//              doneBody = true;
            }
            if(textChunk != null && !doneBody) {
               xhtml.element("p", ((StringChunk)textChunk).getValue());