You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/10/11 12:44:02 UTC
[tika] branch master updated: add tests for xml vulnerabilities.
More work remains on entity expansion...
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new f444fd7 add tests for xml vulnerabilities. More work remains on entity expansion...
f444fd7 is described below
commit f444fd784b99b181cd7bd54cdec9fbd132b4ef93
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Oct 11 08:43:52 2017 -0400
add tests for xml vulnerabilities. More work remains on entity expansion...
---
.../org/apache/tika/TestXMLEntityExpansion.java | 84 +++++++++++++
.../test/java/org/apache/tika/TestXXEInXML.java | 118 +++---------------
.../src/test/java/org/apache/tika/XMLTestBase.java | 137 +++++++++++++++++++++
3 files changed, 235 insertions(+), 104 deletions(-)
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
new file mode 100644
index 0000000..056f056
--- /dev/null
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
@@ -0,0 +1,84 @@
+package org.apache.tika;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.xml.sax.SAXParseException;
+
+import java.io.ByteArrayInputStream;
+import java.nio.charset.StandardCharsets;
+
+import static org.apache.tika.XMLTestBase.injectXML;
+import static org.apache.tika.XMLTestBase.parse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Tests to confirm defenses against entity expansion attacks.
+ */
+@Ignore("initial draft, needs more work")
+public class TestXMLEntityExpansion
+{
+ private static final byte[] ENTITY_EXPANSION_BOMB = new String(
+ "<!DOCTYPE kaboom [ " +
+ "<!ENTITY a \"1234567890\" > " +
+ "<!ENTITY b \"&a;&a;&a;&a;&a;&a;&a;&a;&a;&a;\" >" +
+ "<!ENTITY c \"&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;\" > " +
+ "<!ENTITY d \"&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;\" > " +
+ "<!ENTITY e \"&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;\" > " +
+ "<!ENTITY f \"&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;\" > " +
+ "<!ENTITY g \"&f;&f;&f;&f;&f;&f;&f;&f;&f;&f;\" > " +
+ "<!ENTITY h \"&g;&g;&g;&g;&g;&g;&g;&g;&g;&g;\" > " +
+ "<!ENTITY i \"&h;&h;&h;&h;&h;&h;&h;&h;&h;&h;\" > " +
+ "<!ENTITY j \"&i;&i;&i;&i;&i;&i;&i;&i;&i;&i;\" > " +
+ "<!ENTITY k \"&j;&j;&j;&j;&j;&j;&j;&j;&j;&j;\" > " +
+ "<!ENTITY l \"&k;&k;&k;&k;&k;&k;&k;&k;&k;&k;\" > " +
+ "<!ENTITY m \"&l;&l;&l;&l;&l;&l;&l;&l;&l;&l;\" > " +
+ "<!ENTITY n \"&m;&m;&m;&m;&m;&m;&m;&m;&m;&m;\" > " +
+ "<!ENTITY o \"&n;&n;&n;&n;&n;&n;&n;&n;&n;&n;\" > " +
+ "<!ENTITY p \"&o;&o;&o;&o;&o;&o;&o;&o;&o;&o;\" > " +
+ "<!ENTITY q \"&p;&p;&p;&p;&p;&p;&p;&p;&p;&p;\" > " +
+ "<!ENTITY r \"&q;&q;&q;&q;&q;&q;&q;&q;&q;&q;\" > " +
+ "<!ENTITY s \"&r;&r;&r;&r;&r;&r;&r;&r;&r;&r;\" > " +
+ "]> " +
+ "<kaboom>&s;</kaboom>").getBytes(StandardCharsets.UTF_8);
+
+ //a truly vulnerable parser, say xerces2, doesn't oom, it thrashes with gc.
+ //Set a reasonable amount of time as the timeout
+ @Test(timeout = 20000)
+ public void testInjectedXML() throws Exception {
+ byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+ byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
+ parse("injected", new ByteArrayInputStream(injected), new XMLTestBase.VulnerableSAXParser());
+ }
+
+ @Test(timeout = 20000)//
+ public void testProtectedXML() throws Exception {
+ byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+ byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
+ test("injected", injected, new AutoDetectParser());
+ }
+
+ private static void test(String testFileName, byte[] bytes, Parser parser) throws Exception {
+ boolean ex = false;
+ try {
+ parse(testFileName, new ByteArrayInputStream(bytes), parser);
+ } catch (SAXParseException e) {
+ if (e.getMessage() == null ||
+ ! e.getMessage().contains("entity expansions")) {
+ throw new RuntimeException("Should have seen 'entity expansions' in the msg", e);
+ }
+ ex = true;
+ } catch (TikaException e) {
+ Throwable cause = e.getCause();
+ if (cause == null || cause.getMessage() == null ||
+ ! cause.getMessage().contains("entity expansions")) {
+ throw new RuntimeException("Cause should have mentioned 'entity expansions'", e);
+ }
+ ex = true;
+ }
+ assertTrue("should have had an exception", ex);
+ }
+}
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 3dd394d..1e922b0 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -1,42 +1,26 @@
package org.apache.tika;
import org.apache.tika.config.TikaConfig;
-import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
-import org.apache.tika.sax.TaggedContentHandler;
-import org.apache.tika.sax.TextContentHandler;
import org.apache.tika.sax.ToHTMLContentHandler;
+import org.junit.Ignore;
import org.junit.Test;
import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.io.File;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.Set;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
import static org.junit.Assert.fail;
@@ -45,15 +29,17 @@ import static org.junit.Assert.fail;
* It does not test for XXE prevention in files that may contain xml
* files, such as PDFs and other XMP-containing files.
*/
-public class TestXXEInXML extends TikaTest {
+@Ignore
+public class TestXXEInXML extends XMLTestBase {
//TODO: figure out how to test XFA and xmp in PDFs
- private static final String EVIL = "<!DOCTYPE roottag PUBLIC \"-//OXML/XXE/EN\" \"file:///couldnt_possibly_exist\">";
+ private static final byte[] XXE =
+ "<!DOCTYPE roottag PUBLIC \"-//OXML/XXE/EN\" \"file:///couldnt_possibly_exist\">".getBytes(StandardCharsets.UTF_8);
@Test
public void testConfirmVulnerable() throws Exception {
try {
- parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new VulnerableXMLParser());
+ parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new VulnerableSAXParser());
fail("should have failed!!!");
} catch (FileNotFoundException e) {
@@ -68,9 +54,9 @@ public class TestXXEInXML extends TikaTest {
@Test
public void testInjectedXML() throws Exception {
byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
- byte[] injected = injectXML(bytes);
+ byte[] injected = injectXML(bytes, XXE);
try {
- parse("injected", new ByteArrayInputStream(injected), new VulnerableXMLParser());
+ parse("injected", new ByteArrayInputStream(injected), new VulnerableSAXParser());
fail("injected should have triggered xxe");
} catch (FileNotFoundException e) {
@@ -82,14 +68,14 @@ public class TestXXEInXML extends TikaTest {
InputStream is = getResourceAsStream("/test-documents/testWORD_2003ml.xml");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
IOUtils.copy(is, bos);
- byte[] injected = injectXML(bos.toByteArray());
+ byte[] injected = injectXML(bos.toByteArray(), XXE);
parse("testWORD_2003ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
is.close();
is = getResourceAsStream("/test-documents/testWORD_2006ml.xml");
bos = new ByteArrayOutputStream();
IOUtils.copy(is, bos);
- injected = injectXML(bos.toByteArray());
+ injected = injectXML(bos.toByteArray(), XXE);
parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
}
@@ -118,7 +104,7 @@ public class TestXXEInXML extends TikaTest {
private void _testOOXML(String fileName) throws Exception {
Path originalOOXML = getResourceAsFile("/test-documents/"+fileName).toPath();
- Path injected = injectZippedXMLs(originalOOXML, false);
+ Path injected = injectZippedXMLs(originalOOXML, XXE, false);
Parser p = new AutoDetectParser();
ContentHandler xhtml = new ToHTMLContentHandler();
@@ -143,7 +129,7 @@ public class TestXXEInXML extends TikaTest {
parseContext.set(OfficeParserConfig.class, officeParserConfig);
officeParserConfig.setUseSAXDocxExtractor(true);
officeParserConfig.setUseSAXPptxExtractor(true);
- injected = injectZippedXMLs(originalOOXML, true);
+ injected = injectZippedXMLs(originalOOXML, XXE, true);
p.parse(Files.newInputStream(injected), xhtml, metadata, parseContext);
} catch (FileNotFoundException e) {
@@ -159,7 +145,7 @@ public class TestXXEInXML extends TikaTest {
//handlePart
public void testDocxWithIncorrectSAXConfiguration() throws Exception {
Path originalDocx = getResourceAsFile("/test-documents/testWORD_macros.docm").toPath();
- Path injected = injectZippedXMLs(originalDocx, true);
+ Path injected = injectZippedXMLs(originalDocx, XXE,true);
Parser p = new AutoDetectParser();
ContentHandler xhtml = new ToHTMLContentHandler();
ParseContext parseContext = new ParseContext();
@@ -183,89 +169,13 @@ public class TestXXEInXML extends TikaTest {
getResourceAsStream("/org/apache/tika/config/TIKA-1558-blacklist.xml") ) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
IOUtils.copy(is, bos);
- byte[] injected = injectXML(bos.toByteArray());
+ byte[] injected = injectXML(bos.toByteArray(), XXE);
TikaConfig tikaConfig = new TikaConfig(new ByteArrayInputStream(injected));
}
}
- private Path injectZippedXMLs(Path original, boolean includeSlides) throws IOException {
- ZipFile input = new ZipFile(original.toFile());
- File output = Files.createTempFile("tika-xxe-", ".zip").toFile();
- ZipOutputStream outZip = new ZipOutputStream(new FileOutputStream(output));
- Enumeration<? extends ZipEntry> zipEntryEnumeration = input.entries();
- while (zipEntryEnumeration.hasMoreElements()) {
- ZipEntry entry = zipEntryEnumeration.nextElement();
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(input.getInputStream(entry), bos);
- byte[] bytes = bos.toByteArray();
- if (entry.getName().endsWith(".xml") &&
- //don't inject the slides because you'll get a bean exception
- //Unexpected node
- (! includeSlides && ! entry.getName().contains("slides/slide"))) {
- bytes = injectXML(bytes);
- }
- ZipEntry outEntry = new ZipEntry(entry.getName());
- outZip.putNextEntry(outEntry);
- outZip.write(bytes);
- outZip.closeEntry();
- }
- outZip.flush();
- outZip.close();
-
- return output.toPath();
- }
-
- private byte[] injectXML(byte[] input) throws IOException {
-
- int startXML = -1;
- int endXML = -1;
- for (int i = 0; i < input.length; i++) {
- if (input[i] == '<' && i+1 < input.length && input[i+1] == '?') {
- startXML = i;
- }
- if (input[i] == '?' && i+1 < input.length && input[i+1] == '>') {
- endXML = i+1;
- break;
- }
- }
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- if (startXML > -1 && endXML > -1) {
- bos.write(input, startXML, endXML-startXML+1);
- }
- bos.write(EVIL.getBytes(StandardCharsets.UTF_8));
- bos.write(input, endXML+1, (input.length-endXML-1));
- return bos.toByteArray();
- }
-
- private void parse(String testFile, InputStream is, Parser parser) throws Exception {
- parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
- }
- private static class VulnerableXMLParser extends AbstractParser {
-
- @Override
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return Collections.singleton(MediaType.APPLICATION_XML);
- }
- @Override
- public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
-
- TaggedContentHandler tagged = new TaggedContentHandler(handler);
- try {
- SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
- SAXParser parser = saxParserFactory.newSAXParser();
- parser.parse( stream,
- new TextContentHandler(handler,
- true));
- } catch (SAXException e) {
- //there will be one...ignore it
- } catch (ParserConfigurationException e) {
- throw new TikaException("parser config ex", e);
- }
-
- }
- }
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
new file mode 100644
index 0000000..5534ca9
--- /dev/null
+++ b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
@@ -0,0 +1,137 @@
+package org.apache.tika;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.sax.TextContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+public class XMLTestBase extends TikaTest {
+
+ static byte[] injectXML(byte[] input, byte[] toInject) throws IOException {
+
+ int startXML = -1;
+ int endXML = -1;
+ for (int i = 0; i < input.length; i++) {
+ if (input[i] == '<' && i+1 < input.length && input[i+1] == '?') {
+ startXML = i;
+ }
+ if (input[i] == '?' && i+1 < input.length && input[i+1] == '>') {
+ endXML = i+1;
+ break;
+ }
+ }
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ if (startXML > -1 && endXML > -1) {
+ bos.write(input, startXML, endXML-startXML+1);
+ }
+ bos.write(toInject);
+ bos.write(input, endXML+1, (input.length-endXML-1));
+ return bos.toByteArray();
+ }
+
+ static Path injectZippedXMLs(Path original, byte[] toInject, boolean includeSlides) throws IOException {
+ ZipFile input = new ZipFile(original.toFile());
+ File output = Files.createTempFile("tika-xxe-", ".zip").toFile();
+ ZipOutputStream outZip = new ZipOutputStream(new FileOutputStream(output));
+ Enumeration<? extends ZipEntry> zipEntryEnumeration = input.entries();
+ while (zipEntryEnumeration.hasMoreElements()) {
+ ZipEntry entry = zipEntryEnumeration.nextElement();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(input.getInputStream(entry), bos);
+ byte[] bytes = bos.toByteArray();
+ if (entry.getName().endsWith(".xml") &&
+ //don't inject the slides because you'll get a bean exception
+ //Unexpected node
+ (! includeSlides && ! entry.getName().contains("slides/slide"))) {
+ bytes = injectXML(bytes, toInject);
+ }
+ ZipEntry outEntry = new ZipEntry(entry.getName());
+ outZip.putNextEntry(outEntry);
+ outZip.write(bytes);
+ outZip.closeEntry();
+ }
+ outZip.flush();
+ outZip.close();
+
+ return output.toPath();
+ }
+
+ static class VulnerableDOMParser extends AbstractParser {
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType.APPLICATION_XML);
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
+
+ TaggedContentHandler tagged = new TaggedContentHandler(handler);
+ try {
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(
+ "org.apache.xerces.parsers.SAXParser", this.getClass().getClassLoader());
+ SAXParser parser = saxParserFactory.newSAXParser();
+ parser.parse( stream,
+ new TextContentHandler(handler,
+ true));
+ } catch (ParserConfigurationException e) {
+ throw new TikaException("parser config ex", e);
+ }
+
+ }
+ }
+
+ static class VulnerableSAXParser extends AbstractParser {
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType.APPLICATION_XML);
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
+
+ TaggedContentHandler tagged = new TaggedContentHandler(handler);
+ try {
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(
+ "org.apache.xerces.jaxp.SAXParserFactoryImpl", this.getClass().getClassLoader());
+ SAXParser parser = saxParserFactory.newSAXParser();
+ parser.parse( stream,
+ new TextContentHandler(handler,
+ true));
+ } catch (ParserConfigurationException e) {
+ throw new TikaException("parser config ex", e);
+ }
+
+ }
+ }
+ static void parse(String testFileName, InputStream is, Parser parser) throws Exception {
+ parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+ }
+}
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].