You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/09/13 15:10:10 UTC

svn commit: r996517 - in /tika/trunk/tika-parsers: ./ src/main/java/org/apache/tika/parser/microsoft/ src/main/java/org/apache/tika/parser/microsoft/ooxml/ src/test/java/org/apache/tika/parser/microsoft/ src/test/java/org/apache/tika/parser/microsoft/o...

Author: nick
Date: Mon Sep 13 13:10:09 2010
New Revision: 996517

URL: http://svn.apache.org/viewvc?rev=996517&view=rev
Log:
More Office embedded resource extraction support (TIKA-509)
Existing outlook code has been updated to the new style, and tests added
XSLF .pptx support has been added with tests
POI version bumped from 3.7 beta 1 to 3.7 beta 2, as required for better outlook attachement support

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_doc.msg   (with props)
    tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_msg.msg   (with props)
Modified:
    tika/trunk/tika-parsers/pom.xml
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java

Modified: tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Mon Sep 13 13:10:09 2010
@@ -35,7 +35,7 @@
   <url>http://tika.apache.org/</url>
 
   <properties>
-    <poi.version>3.7-beta1</poi.version>
+    <poi.version>3.7-beta2</poi.version>
   </properties>
 
   <repositories>

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java Mon Sep 13 13:10:09 2010
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.parser.microsoft;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 
 import org.apache.poi.hsmf.MAPIMessage;
@@ -24,33 +23,30 @@ import org.apache.poi.hsmf.datatypes.Att
 import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.SAXException;
 
 /**
  * Outlook Message Parser.
  */
-public class OutlookExtractor {
+public class OutlookExtractor extends AbstractPOIFSExtractor {
     private final MAPIMessage msg;
-    private final ParseContext context;
 
     public OutlookExtractor(POIFSFileSystem filesystem, ParseContext context) throws TikaException {
+        super(context);
+        
         try {
             this.msg = new MAPIMessage(filesystem);
-            this.context = context;
         } catch (IOException e) {
             throw new TikaException("Failed to parse Outlook message", e);
         }
     }
 
     public void parse(XHTMLContentHandler xhtml, Metadata metadata)
-            throws TikaException, SAXException {
+            throws TikaException, SAXException, IOException {
         try {
            msg.setReturnNullOnMissingChunk(true);
           
@@ -116,30 +112,31 @@ public class OutlookExtractor {
            
            for (AttachmentChunks attachment : msg.getAttachmentFiles()) {
                xhtml.startElement("div", "class", "attachment-entry");
-               Metadata entrydata = new Metadata();
                
                String filename = null;
                if (attachment.attachLongFileName != null) {
-        	   filename = attachment.attachLongFileName.getValue();
+                  filename = attachment.attachLongFileName.getValue();
                } else if (attachment.attachFileName != null) {
-        	   filename = attachment.attachFileName.getValue();
+                  filename = attachment.attachFileName.getValue();
                }
                if (filename != null && filename.length() > 0) {
-                   entrydata.set(Metadata.RESOURCE_NAME_KEY, filename);
                    xhtml.element("h1", filename);
                }
                
-               try {
-                   // Use the delegate parser to parse this entry
-                   context.get(Parser.class, EmptyParser.INSTANCE).parse(
-                	   new ByteArrayInputStream(attachment.attachData.getValue()),
-                           new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
-                           entrydata,
-                           context
-                   );
-               } catch (Exception e) {
-                   // Could not parse the entry, just skip the content
+               if(attachment.attachData != null) {
+                  handleEmbeddedResource(
+                        TikaInputStream.get(attachment.attachData.getValue()),
+                        filename,
+                        null, xhtml
+                  );
                }
+               if(attachment.attachmentDirectory != null) {
+                  handleEmbededOfficeDoc(
+                        attachment.attachmentDirectory.getDirectory(),
+                        xhtml
+                  );
+               }
+
                xhtml.endElement("div");
                
            }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java Mon Sep 13 13:10:09 2010
@@ -20,11 +20,21 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.openxml4j.opc.TargetMode;
 import org.apache.poi.xslf.XSLFSlideShow;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
@@ -97,9 +107,42 @@ public class XSLFPowerPointExtractorDeco
         }
     }
     
+    /**
+     * In PowerPoint files, slides have things embedded in them,
+     *  and slide drawings which have the images
+     */
     @Override
-    protected List<PackagePart> getMainDocumentParts() {
-       // TODO
-       return new ArrayList<PackagePart>();
+    protected List<PackagePart> getMainDocumentParts() throws TikaException {
+       List<PackagePart> parts = new ArrayList<PackagePart>();
+       XSLFSlideShow document = (XSLFSlideShow) extractor.getDocument();
+       
+       for (CTSlideIdListEntry ctSlide : document.getSlideReferences().getSldIdList()) {
+          // Add the slide
+          PackagePart slidePart;
+          try {
+             slidePart = document.getSlidePart(ctSlide);
+          } catch(IOException e) {
+             throw new TikaException("Broken OOXML file", e);
+          } catch(XmlException xe) {
+             throw new TikaException("Broken OOXML file", xe);
+          }
+          parts.add(slidePart);
+          
+          // If it has drawings, return those too
+          try {
+             // TODO Improve when we upgrade POI
+//             for(PackageRelationship rel : slidePart.getRelationshipsByType(XSLFRelation.VML_DRAWING.getRelation())) {
+             for(PackageRelationship rel : slidePart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing")) {
+                if(rel.getTargetMode() == TargetMode.INTERNAL) {
+                   PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+                   parts.add( rel.getPackage().getPart(relName) );
+                }
+             }
+          } catch(InvalidFormatException e) {
+             throw new TikaException("Broken OOXML file", e);
+          }
+       }
+
+       return parts;
     }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Mon Sep 13 13:10:09 2010
@@ -38,8 +38,13 @@ public abstract class AbstractPOIContain
     public static final MediaType TYPE_DOCX = MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document");
     public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
     public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+    public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
+    
+    public static final MediaType TYPE_TXT = MediaType.text("plain");
+    public static final MediaType TYPE_PDF = MediaType.application("pdf");
     
     public static final MediaType TYPE_JPG = MediaType.image("jpeg");
+    public static final MediaType TYPE_GIF = MediaType.image("gif");
     public static final MediaType TYPE_PNG = MediaType.image("png");
     public static final MediaType TYPE_EMF = MediaType.image("x-emf");
     

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java Mon Sep 13 13:10:09 2010
@@ -71,6 +71,7 @@ public class POIContainerExtractionTest 
        // PowerPoint with 2 images + sound
        // TODO
        
+       
        // Word with 1 image
        handler = process("testWORD_1img.doc", extractor, false);
        assertEquals(1, handler.filenames.size());
@@ -172,7 +173,6 @@ public class POIContainerExtractionTest 
        
        // With recursion, should get their images too
        handler = process("testWORD_embeded.doc", extractor, true);
-       // TODO - Not all resources of embedded files are currently extracted 
        assertEquals(12, handler.filenames.size());
        assertEquals(12, handler.mediaTypes.size());
        
@@ -193,15 +193,32 @@ public class POIContainerExtractionTest 
        assertEquals(TYPE_XLS, handler.mediaTypes.get(10)); // Embedded office doc
        assertEquals(TYPE_PNG, handler.mediaTypes.get(11)); //    PNG inside .xls
        
+       
        // PowerPoint with excel and word
        // TODO
        
        
        // Outlook with a text file and a word document
-       // TODO
+       handler = process("testMSG_att_doc.msg", extractor, true);
+       assertEquals(2, handler.filenames.size());
+       assertEquals(2, handler.mediaTypes.size());
+       
+       assertEquals("test-unicode.doc", handler.filenames.get(0));
+       assertEquals(TYPE_DOC, handler.mediaTypes.get(0));
+       
+       assertEquals("pj1.txt", handler.filenames.get(1));
+       assertEquals(TYPE_TXT, handler.mediaTypes.get(1));
        
        
        // Outlook with a pdf and another outlook message
-       // TODO
+       handler = process("testMSG_att_msg.msg", extractor, true);
+       assertEquals(2, handler.filenames.size());
+       assertEquals(2, handler.mediaTypes.size());
+       
+       assertEquals(null, handler.filenames.get(0));
+       assertEquals(TYPE_MSG, handler.mediaTypes.get(0));
+       
+       assertEquals("smbprn.00009008.KdcPjl.pdf", handler.filenames.get(1));
+       assertEquals(TYPE_PDF, handler.mediaTypes.get(1));
     }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java?rev=996517&r1=996516&r2=996517&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java Mon Sep 13 13:10:09 2010
@@ -79,7 +79,18 @@ public class OOXMLContainerExtractionTes
 
        
        // PowerPoint with 2 images + sound
-       // TODO
+       // TODO Figure out why we can't find the sound anywhere...
+       handler = process("testPPT_2imgs.pptx", extractor, false);
+       assertEquals(3, handler.filenames.size());
+       assertEquals(3, handler.mediaTypes.size());
+       
+       assertEquals(null, handler.filenames.get(0));
+       assertEquals(null, handler.filenames.get(1));
+       assertEquals(null, handler.filenames.get(2));
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(0));
+       assertEquals(TYPE_GIF, handler.mediaTypes.get(1)); // icon of sound
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(2));
+       
        
        // Word with 1 image
        handler = process("testWORD_1img.docx", extractor, false);
@@ -141,21 +152,35 @@ public class OOXMLContainerExtractionTes
        
        // With recursion, should get the images embedded in the office files too
        handler = process("testEXCEL_embeded.xlsx", extractor, true);
-       assertEquals(9, handler.filenames.size());
-       assertEquals(9, handler.mediaTypes.size());
+       assertEquals(23, handler.filenames.size());
+       assertEquals(23, handler.mediaTypes.size());
        
        for(String filename : handler.filenames)
           assertEquals(null, filename);
        
        assertEquals(TYPE_PPTX, handler.mediaTypes.get(0)); // Embedded office doc
-       assertEquals(TYPE_DOC, handler.mediaTypes.get(1));  // Embedded office doc
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(2));  //   PNG inside .doc
-       assertEquals(TYPE_DOCX, handler.mediaTypes.get(3)); // Embedded office doc
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(4));  //   PNG inside .docx
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(5)); // Embedded image
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(6)); // Icon of embedded office doc
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(7)); // Icon of embedded office doc
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(8)); // Icon of embedded office doc
+        assertEquals(TYPE_PNG, handler.mediaTypes.get(1));  //   PNG inside .pptx
+        assertEquals(TYPE_GIF, handler.mediaTypes.get(2));  //   PNG inside .pptx
+        assertEquals(TYPE_PNG, handler.mediaTypes.get(3));  //   PNG inside .pptx
+        assertEquals(TYPE_XLSX, handler.mediaTypes.get(4)); //   .xlsx inside .pptx
+         assertEquals(TYPE_PNG, handler.mediaTypes.get(5)); //     PNG inside .xlsx inside .pptx
+        assertEquals(TYPE_DOCX, handler.mediaTypes.get(6)); //   .docx inside .pptx
+         assertEquals(TYPE_PNG, handler.mediaTypes.get(7)); //     PNG inside .docx inside .pptx
+         assertEquals(TYPE_JPG, handler.mediaTypes.get(8)); //     JPG inside .docx inside .pptx
+         assertEquals(TYPE_PNG, handler.mediaTypes.get(9)); //     PNG inside .docx inside .pptx
+        assertEquals(TYPE_DOC, handler.mediaTypes.get(10)); //   .doc inside .pptx
+         assertEquals(TYPE_PNG, handler.mediaTypes.get(11)); //    PNG inside .doc inside .pptx
+        assertEquals(TYPE_EMF, handler.mediaTypes.get(12)); //   Icon of item inside .pptx
+        assertEquals(TYPE_EMF, handler.mediaTypes.get(13)); //   Icon of item inside .pptx
+        assertEquals(TYPE_EMF, handler.mediaTypes.get(14)); //   Icon of item inside .pptx
+       assertEquals(TYPE_DOC, handler.mediaTypes.get(15));  // Embedded office doc
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(16));  //   PNG inside .doc
+       assertEquals(TYPE_DOCX, handler.mediaTypes.get(17)); // Embedded office doc
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(18));  //   PNG inside .docx
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(19)); // Embedded image
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(20)); // Icon of embedded office doc
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(21)); // Icon of embedded office doc
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(22)); // Icon of embedded office doc
        
        
        // Word with .docx, powerpoint and excel
@@ -180,26 +205,46 @@ public class OOXMLContainerExtractionTes
        
        // With recursion, should get their images too
        handler = process("testWORD_embeded.docx", extractor, true);
-       assertEquals(11, handler.filenames.size());
-       assertEquals(11, handler.mediaTypes.size());
+       assertEquals(14, handler.filenames.size());
+       assertEquals(14, handler.mediaTypes.size());
        
        // We don't know their filenames
        for(String filename : handler.filenames)
           assertEquals(null, filename);
        // But we do know their types
        assertEquals(TYPE_PPTX, handler.mediaTypes.get(0)); // Embedded office doc
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(1));  // Icon of embedded office doc
-       assertEquals(TYPE_DOC, handler.mediaTypes.get(2));  // Embedded office doc
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(3));  //   PNG inside .doc
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(4));  // Embedded image
-       assertEquals(TYPE_JPG, handler.mediaTypes.get(5));  // Embedded image
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(6));  // Embedded image
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(7));  // Icon of embedded office doc 
-       assertEquals(TYPE_XLSX, handler.mediaTypes.get(8)); // Embeded office doc
-       assertEquals(TYPE_PNG, handler.mediaTypes.get(9));  //   PNG inside .xlsx
-       assertEquals(TYPE_EMF, handler.mediaTypes.get(10)); // Icon of embedded office doc
+        assertEquals(TYPE_PNG, handler.mediaTypes.get(1));  //   PNG inside .pptx
+        assertEquals(TYPE_GIF, handler.mediaTypes.get(2));  //   GIF inside .pptx
+        assertEquals(TYPE_PNG, handler.mediaTypes.get(3));  //   PNG inside .pptx
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(4));  // Icon of embedded office doc
+       assertEquals(TYPE_DOC, handler.mediaTypes.get(5));  // Embedded office doc
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(6));  //   PNG inside .doc
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(7));  // Embedded image
+       assertEquals(TYPE_JPG, handler.mediaTypes.get(8));  // Embedded image
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(9));  // Embedded image
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(10));  // Icon of embedded office doc 
+       assertEquals(TYPE_XLSX, handler.mediaTypes.get(11)); // Embeded office doc
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(12));  //   PNG inside .xlsx
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(13)); // Icon of embedded office doc
+       
        
        // PowerPoint with excel and word
-       // TODO
+       handler = process("testPPT_embeded.pptx", extractor, false);
+       assertEquals(9, handler.filenames.size());
+       assertEquals(9, handler.mediaTypes.size());
+       
+       // We don't know their filenames
+       for(String filename : handler.filenames)
+          assertEquals(null, filename);
+       // But we do know their types
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(0));  // Embedded image
+       assertEquals(TYPE_GIF, handler.mediaTypes.get(1));  // Embedded image
+       assertEquals(TYPE_PNG, handler.mediaTypes.get(2));  // Embedded image
+       assertEquals(TYPE_XLSX, handler.mediaTypes.get(3)); // Embedded office doc
+       assertEquals(TYPE_DOCX, handler.mediaTypes.get(4)); // Embedded office doc
+       assertEquals(TYPE_DOC, handler.mediaTypes.get(5));  // Embedded office doc
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(6));  // Icon of embedded office doc
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(7));  // Icon of embedded office doc
+       assertEquals(TYPE_EMF, handler.mediaTypes.get(8));  // Icon of embedded office doc
     }
 }

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_doc.msg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_doc.msg?rev=996517&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_doc.msg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_msg.msg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_msg.msg?rev=996517&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testMSG_att_msg.msg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream