You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/11/21 11:41:58 UTC
svn commit: r1204441 -
/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Author: nick
Date: Mon Nov 21 10:41:57 2011
New Revision: 1204441
URL: http://svn.apache.org/viewvc?rev=1204441&view=rev
Log:
A few more TIKA-786 related tests
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java?rev=1204441&r1=1204440&r2=1204441&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java Mon Nov 21 10:41:57 2011
@@ -81,6 +81,10 @@ public class TestContainerAwareDetector
// assertTypeByNameAndData("testEXCEL.xls", "notWord.doc", "application/vnd.ms-excel");
// assertTypeByNameAndData("testWORD.doc", "notExcel.xls", "application/msword");
// assertTypeByNameAndData("testPPT.ppt", "notWord.doc", "application/vnd.ms-powerpoint");
+
+ // With a filename of a totally different type, data will trump filename
+ assertTypeByNameAndData("testEXCEL.xls", "notPDF.pdf", "application/vnd.ms-excel");
+ assertTypeByNameAndData("testEXCEL.xls", "notPNG.png", "application/vnd.ms-excel");
}
public void testOpenContainer() throws Exception {
@@ -201,21 +205,50 @@ public class TestContainerAwareDetector
public void testTruncatedFiles() throws Exception {
// First up a truncated OOXML (zip) file
+
+ // With only the data supplied, the best we can do is the container
TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300);
+ Metadata m = new Metadata();
try {
assertEquals(
MediaType.application("x-tika-ooxml"),
- detector.detect(xlsx, new Metadata()));
+ detector.detect(xlsx, m));
+ } finally {
+ xlsx.close();
+ }
+
+ // With truncated data + filename, we can use the filename to specialise
+ xlsx = getTruncatedFile("testEXCEL.xlsx", 300);
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
+ try {
+ assertEquals(
+ MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+ detector.detect(xlsx, m));
} finally {
xlsx.close();
}
+
// Now a truncated OLE2 file
TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400);
+ m = new Metadata();
try {
assertEquals(
MediaType.application("x-tika-msoffice"),
- detector.detect(xls, new Metadata()));
+ detector.detect(xls, m));
+ } finally {
+ xls.close();
+ }
+
+ // Finally a truncated OLE2 file, with a filename available
+ xls = getTruncatedFile("testEXCEL.xls", 400);
+ m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
+ try {
+ assertEquals(
+ MediaType.application("vnd.ms-excel"),
+ detector.detect(xls, m));
} finally {
xls.close();
}