You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/09 11:24:15 UTC
svn commit: r1167056 - in /tika/trunk/tika-parsers/src/test:
java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java
resources/test-documents/EmbeddedPDF.docx
Author: jukka
Date: Fri Sep 9 09:24:14 2011
New Revision: 1167056
URL: http://svn.apache.org/viewvc?rev=1167056&view=rev
Log:
TIKA-704: PDF and Outlook docs embedded in MS Word documents not parsed
Remove hidden Yamaha manual from the EmbeddedPDF.docx test file.
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java
tika/trunk/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java?rev=1167056&r1=1167055&r2=1167056&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLContainerExtractionTest.java Fri Sep 9 09:24:14 2011
@@ -274,20 +274,14 @@ public class OOXMLContainerExtractionTes
TrackingHandler handler =
process("EmbeddedPDF.docx", extractor, false);
- assertEquals(4, handler.filenames.size());
- assertEquals(4, handler.mediaTypes.size());
+ assertEquals(2, handler.filenames.size());
+ assertEquals(2, handler.mediaTypes.size());
assertEquals("image1.emf", handler.filenames.get(0));
assertEquals(TYPE_EMF, handler.mediaTypes.get(0));
assertNull(handler.filenames.get(1));
assertEquals(TYPE_PDF, handler.mediaTypes.get(1));
-
- assertEquals("image2.emf", handler.filenames.get(2));
- assertEquals(TYPE_EMF, handler.mediaTypes.get(2));
-
- assertNull(handler.filenames.get(3));
- assertEquals(TYPE_PDF, handler.mediaTypes.get(3));
}
}
Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx?rev=1167056&r1=1167055&r2=1167056&view=diff
==============================================================================
Files tika/trunk/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx (original) and tika/trunk/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx Fri Sep 9 09:24:14 2011 differ