You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/09/15 18:37:24 UTC

[tika] branch master updated: TIKA-2465 -- add epub

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e8d45a  TIKA-2465 -- add epub
2e8d45a is described below

commit 2e8d45a54852d2fffcea779824b16357b1375d23
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Sep 15 14:37:15 2017 -0400

    TIKA-2465 -- add epub
---
 .../src/test/java/org/apache/tika/TestXXEInXML.java       | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 48f3733..28fe85c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -40,7 +40,7 @@ import java.util.zip.ZipOutputStream;
 import static org.junit.Assert.fail;
 
 /**
- * This tests for XXE in basically xml type files, straight xml and ooxml.
+ * This tests for XXE in basically xml type files, straight xml and zipped xmls, e.g. ebook and ooxml.
  * It does not test for XXE prevention in files that may contain xml
  * files, such as PDFs and other XMP-containing files.
  */
@@ -93,7 +93,7 @@ public class TestXXEInXML extends TikaTest {
     }
 
     @Test
-    public void testOOXML() throws Exception {
+    public void testXMLInZips() throws Exception {
         for (String fileName : new String[]{
                 "testWORD.docx",
                 "testWORD_1img.docx",
@@ -107,7 +107,8 @@ public class TestXXEInXML extends TikaTest {
                 "testPPT_2imgs.pptx",
                 "testPPT_comment.pptx",
                 "testPPT_EmbeddedPDF.pptx",
-                "testPPT_macros.pptm"
+                "testPPT_macros.pptm",
+                "testEPUB.epub"
         }) {
             _testOOXML(fileName);
         }
@@ -116,7 +117,7 @@ public class TestXXEInXML extends TikaTest {
     private void _testOOXML(String fileName) throws Exception {
 
         Path originalOOXML = getResourceAsFile("/test-documents/"+fileName).toPath();
-        Path injected = injectOOXML(originalOOXML, false);
+        Path injected = injectZippedXMLs(originalOOXML, false);
 
         Parser p = new AutoDetectParser();
         ContentHandler xhtml = new ToHTMLContentHandler();
@@ -141,7 +142,7 @@ public class TestXXEInXML extends TikaTest {
             parseContext.set(OfficeParserConfig.class, officeParserConfig);
             officeParserConfig.setUseSAXDocxExtractor(true);
             officeParserConfig.setUseSAXPptxExtractor(true);
-            injected = injectOOXML(originalOOXML, true);
+            injected = injectZippedXMLs(originalOOXML, true);
 
             p.parse(Files.newInputStream(injected), xhtml, metadata, parseContext);
         } catch (FileNotFoundException e) {
@@ -157,7 +158,7 @@ public class TestXXEInXML extends TikaTest {
     //handlePart
     public void testDocxWithIncorrectSAXConfiguration() throws Exception {
         Path originalDocx = getResourceAsFile("/test-documents/testWORD_macros.docm").toPath();
-        Path injected = injectOOXML(originalDocx, true);
+        Path injected = injectZippedXMLs(originalDocx, true);
         Parser p = new AutoDetectParser();
         ContentHandler xhtml = new ToHTMLContentHandler();
         ParseContext parseContext = new ParseContext();
@@ -173,7 +174,7 @@ public class TestXXEInXML extends TikaTest {
         }
     }
 
-    private Path injectOOXML(Path original, boolean includeSlides) throws IOException {
+    private Path injectZippedXMLs(Path original, boolean includeSlides) throws IOException {
         ZipFile input = new ZipFile(original.toFile());
         File output = Files.createTempFile("tika-xxe-", ".zip").toFile();
         ZipOutputStream outZip = new ZipOutputStream(new FileOutputStream(output));

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].