You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/11/15 10:41:47 UTC
svn commit: r1202109 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
test/java/org/apache/tika/detect/TestContainerAwareDetector.java
test/resources/test-documents/testWORKS2000.wps
Author: nick
Date: Tue Nov 15 09:41:46 2011
New Revision: 1202109
URL: http://svn.apache.org/viewvc?rev=1202109&view=rev
Log:
TIKA-779 Works 2000 container aware detection, plus test
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps (with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Tue Nov 15 09:41:46 2011
@@ -124,10 +124,14 @@ public class POIFSContainerDetector impl
} else if (names.contains("VisioDocument")) {
return VSD;
} else if (names.contains("CONTENTS") && names.contains("SPELLING")) {
+ // Newer Works files
+ return WPS;
+ } else if (names.contains("CONTENTS") && names.contains("\u0001CompObj")) {
+ // Normally an older Works file
return WPS;
} else if (names.contains("CONTENTS")) {
- // CONTENTS without SPELLING normally means some sort of
- // embedded non-office file inside an OLE2 document
+ // CONTENTS without SPELLING nor CompObj normally means some sort
+ // of embedded non-office file inside an OLE2 document
// This is most commonly triggered on nested directories
return OLE;
} else if (names.contains("\u0001Ole10Native")) {
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1202109&r1=1202108&r2=1202109&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Tue Nov 15 09:41:46 2011
@@ -56,6 +56,7 @@ public class TestContainerAwareDetector
// Try some ones that POI doesn't handle, that are still OLE2 based
assertDetect("testWORKS.wps", "application/vnd.ms-works");
+ assertDetect("testWORKS2000.wps", "application/vnd.ms-works");
assertDetect("testCOREL.shw", "application/x-corelpresentations");
assertDetect("testQUATTRO.qpw", "application/x-quattro-pro");
assertDetect("testQUATTRO.wb3", "application/x-quattro-pro");
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps?rev=1202109&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORKS2000.wps
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream