You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/09/15 18:37:24 UTC
[tika] branch master updated: TIKA-2465 -- add epub
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 2e8d45a TIKA-2465 -- add epub
2e8d45a is described below
commit 2e8d45a54852d2fffcea779824b16357b1375d23
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Sep 15 14:37:15 2017 -0400
TIKA-2465 -- add epub
---
.../src/test/java/org/apache/tika/TestXXEInXML.java | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 48f3733..28fe85c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -40,7 +40,7 @@ import java.util.zip.ZipOutputStream;
import static org.junit.Assert.fail;
/**
- * This tests for XXE in basically xml type files, straight xml and ooxml.
+ * This tests for XXE in basically xml type files, straight xml and zipped xmls, e.g. ebook and ooxml.
* It does not test for XXE prevention in files that may contain xml
* files, such as PDFs and other XMP-containing files.
*/
@@ -93,7 +93,7 @@ public class TestXXEInXML extends TikaTest {
}
@Test
- public void testOOXML() throws Exception {
+ public void testXMLInZips() throws Exception {
for (String fileName : new String[]{
"testWORD.docx",
"testWORD_1img.docx",
@@ -107,7 +107,8 @@ public class TestXXEInXML extends TikaTest {
"testPPT_2imgs.pptx",
"testPPT_comment.pptx",
"testPPT_EmbeddedPDF.pptx",
- "testPPT_macros.pptm"
+ "testPPT_macros.pptm",
+ "testEPUB.epub"
}) {
_testOOXML(fileName);
}
@@ -116,7 +117,7 @@ public class TestXXEInXML extends TikaTest {
private void _testOOXML(String fileName) throws Exception {
Path originalOOXML = getResourceAsFile("/test-documents/"+fileName).toPath();
- Path injected = injectOOXML(originalOOXML, false);
+ Path injected = injectZippedXMLs(originalOOXML, false);
Parser p = new AutoDetectParser();
ContentHandler xhtml = new ToHTMLContentHandler();
@@ -141,7 +142,7 @@ public class TestXXEInXML extends TikaTest {
parseContext.set(OfficeParserConfig.class, officeParserConfig);
officeParserConfig.setUseSAXDocxExtractor(true);
officeParserConfig.setUseSAXPptxExtractor(true);
- injected = injectOOXML(originalOOXML, true);
+ injected = injectZippedXMLs(originalOOXML, true);
p.parse(Files.newInputStream(injected), xhtml, metadata, parseContext);
} catch (FileNotFoundException e) {
@@ -157,7 +158,7 @@ public class TestXXEInXML extends TikaTest {
//handlePart
public void testDocxWithIncorrectSAXConfiguration() throws Exception {
Path originalDocx = getResourceAsFile("/test-documents/testWORD_macros.docm").toPath();
- Path injected = injectOOXML(originalDocx, true);
+ Path injected = injectZippedXMLs(originalDocx, true);
Parser p = new AutoDetectParser();
ContentHandler xhtml = new ToHTMLContentHandler();
ParseContext parseContext = new ParseContext();
@@ -173,7 +174,7 @@ public class TestXXEInXML extends TikaTest {
}
}
- private Path injectOOXML(Path original, boolean includeSlides) throws IOException {
+ private Path injectZippedXMLs(Path original, boolean includeSlides) throws IOException {
ZipFile input = new ZipFile(original.toFile());
File output = Files.createTempFile("tika-xxe-", ".zip").toFile();
ZipOutputStream outZip = new ZipOutputStream(new FileOutputStream(output));
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].