You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/09/14 01:42:00 UTC
[tika] branch master updated: TIKA-2465 -- add explicit unit tests
for xxe vulnerabilities
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new f311188 TIKA-2465 -- add explicit unit tests for xxe vulnerabilities
f311188 is described below
commit f3111889082f796ee22cfe64955f0222d0f8b2c6
Author: tballison <ta...@mitre.org>
AuthorDate: Wed Sep 13 21:41:49 2017 -0400
TIKA-2465 -- add explicit unit tests for xxe vulnerabilities
---
.../test/java/org/apache/tika/TestXXEInXML.java | 242 +++++++++++++++++++++
.../src/test/resources/test-documents/testXXE.xml | 4 +
2 files changed, 246 insertions(+)
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
new file mode 100644
index 0000000..78790d1
--- /dev/null
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -0,0 +1,242 @@
+package org.apache.tika;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.microsoft.OfficeParserConfig;
+import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.sax.TextContentHandler;
+import org.apache.tika.sax.ToHTMLContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+import static org.junit.Assert.fail;
+
+public class TestXXEInXML extends TikaTest {
+ //TODO: figure out how to test XFA and xmp in PDFs
+
+ private static final String EVIL = "<!DOCTYPE roottag PUBLIC \"-//OXML/XXE/EN\" \"file:///couldnt_possibly_exist\">";
+
+ @Test
+ public void testConfirmVulnerable() throws Exception {
+ try {
+ parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new VulnerableXMLParser());
+ fail("should have failed!!!");
+ } catch (FileNotFoundException e) {
+
+ }
+ }
+
+ @Test
+ public void testXML() throws Exception {
+ parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new AutoDetectParser());
+ }
+
+ @Test
+ public void testInjectedXML() throws Exception {
+ byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+ byte[] injected = injectXML(bytes);
+ try {
+ parse("injected", new ByteArrayInputStream(injected), new VulnerableXMLParser());
+ fail("injected should have triggered xxe");
+ } catch (FileNotFoundException e) {
+
+ }
+ }
+
+ @Test
+ public void test2003_2006xml() throws Exception {
+ InputStream is = getResourceAsStream("/test-documents/testWORD_2003ml.xml");
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(is, bos);
+ byte[] injected = injectXML(bos.toByteArray());
+ parse("testWORD_2003ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+ is.close();
+
+ is = getResourceAsStream("/test-documents/testWORD_2006ml.xml");
+ bos = new ByteArrayOutputStream();
+ IOUtils.copy(is, bos);
+ injected = injectXML(bos.toByteArray());
+ parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+ }
+
+ @Test
+ public void testOOXML() throws Exception {
+ for (String fileName : new String[]{
+ "testWORD.docx",
+ "testWORD_1img.docx",
+ "testWORD_2006ml.docx",
+ "testWORD_embedded_pics.docx",
+ "testWORD_macros.docm",
+ "testEXCEL_textbox.xlsx",
+ "testEXCEL_macro.xlsm",
+ "testEXCEL_phonetic.xlsx",
+ "testEXCEL_embeddedPDF_windows.xlsx",
+ "testPPT_2imgs.pptx",
+ "testPPT_comment.pptx",
+ "testPPT_embeddedPDF.pptx",
+ "testPPT_macros.pptm"
+ }) {
+ try {
+ _testOOXML(fileName);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail("problem with: "+fileName + ": "+ e.getMessage());
+ }
+ }
+ }
+
+ private void _testOOXML(String fileName) throws Exception {
+ Path originalOOXML = getResourceAsFile("/test-documents/"+fileName).toPath();
+ Path injected = injectOOXML(originalOOXML);
+ Parser p = new AutoDetectParser();
+ ContentHandler xhtml = new ToHTMLContentHandler();
+ ParseContext parseContext = new ParseContext();
+ OfficeParserConfig officeParserConfig = new OfficeParserConfig();
+ parseContext.set(OfficeParserConfig.class, officeParserConfig);
+ //if the SafeContentHandler is turned off, this will throw an FNFE
+ Metadata metadata = new Metadata();
+ try {
+ p.parse(Files.newInputStream(injected), xhtml, metadata, parseContext);
+
+ metadata = new Metadata();
+ officeParserConfig.setUseSAXDocxExtractor(true);
+ officeParserConfig.setUseSAXPptxExtractor(true);
+
+ p.parse(Files.newInputStream(injected), xhtml, metadata, parseContext);
+
+ } finally {
+ Files.delete(injected);
+ }
+
+ }
+
+ //use this to confirm that this works
+ //by manually turning off the SafeContentHandler in SXWPFWordExtractorDecorator's
+ //handlePart
+ public void testDocxWithIncorrectSAXConfiguration() throws Exception {
+ Path originalDocx = getResourceAsFile("/test-documents/testWORD_macros.docm").toPath();
+ Path injected = injectOOXML(originalDocx);
+ Parser p = new AutoDetectParser();
+ ContentHandler xhtml = new ToHTMLContentHandler();
+ ParseContext parseContext = new ParseContext();
+ OfficeParserConfig officeParserConfig = new OfficeParserConfig();
+ officeParserConfig.setUseSAXDocxExtractor(true);
+ parseContext.set(OfficeParserConfig.class, officeParserConfig);
+ parseContext.set(SAXParser.class, SAXParserFactory.newInstance().newSAXParser());
+ //if the SafeContentHandler is turned off, this will throw an FNFE
+ try {
+ p.parse(Files.newInputStream(injected), xhtml, new Metadata(), parseContext);
+ } finally {
+ //Files.delete(injected);
+ }
+ }
+
+ private Path injectOOXML(Path original) throws IOException {
+ ZipFile input = new ZipFile(original.toFile());
+ File output = Files.createTempFile("tika-xxe-", ".zip").toFile();
+ ZipOutputStream outZip = new ZipOutputStream(new FileOutputStream(output));
+ Enumeration<? extends ZipEntry> zipEntryEnumeration = input.entries();
+ while (zipEntryEnumeration.hasMoreElements()) {
+ ZipEntry entry = zipEntryEnumeration.nextElement();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ IOUtils.copy(input.getInputStream(entry), bos);
+ byte[] bytes = bos.toByteArray();
+ if (entry.getName().endsWith(".xml") &&
+ //don't inject the slides because you'll get a bean exception
+ //Unexpected node
+ ! entry.getName().contains("slides/slide")) {
+ bytes = injectXML(bytes);
+ }
+ ZipEntry outEntry = new ZipEntry(entry.getName());
+ outZip.putNextEntry(outEntry);
+ outZip.write(bytes);
+ outZip.closeEntry();
+ }
+ outZip.flush();
+ outZip.close();
+
+ return output.toPath();
+ }
+
+ private byte[] injectXML(byte[] input) throws IOException {
+
+ int startXML = -1;
+ int endXML = -1;
+ for (int i = 0; i < input.length; i++) {
+ if (input[i] == '<' && i+1 < input.length && input[i+1] == '?') {
+ startXML = i;
+ }
+ if (input[i] == '?' && i+1 < input.length && input[i+1] == '>') {
+ endXML = i+1;
+ break;
+ }
+ }
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ if (startXML > -1 && endXML > -1) {
+ bos.write(input, startXML, endXML-startXML+1);
+ }
+ bos.write(EVIL.getBytes(StandardCharsets.UTF_8));
+ bos.write(input, endXML+1, (input.length-endXML-1));
+ return bos.toByteArray();
+ }
+
+ private void parse(String testFile, InputStream is, Parser parser) throws Exception {
+ parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+ }
+
+
+ private static class VulnerableXMLParser extends AbstractParser {
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType.APPLICATION_XML);
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
+
+ TaggedContentHandler tagged = new TaggedContentHandler(handler);
+ try {
+ SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+ SAXParser parser = saxParserFactory.newSAXParser();
+ parser.parse( stream,
+ new TextContentHandler(handler,
+ true));
+ } catch (SAXException e) {
+ //there will be one...ignore it
+ } catch (ParserConfigurationException e) {
+ throw new TikaException("parser config ex", e);
+ }
+
+ }
+ }
+
+}
diff --git a/tika-parsers/src/test/resources/test-documents/testXXE.xml b/tika-parsers/src/test/resources/test-documents/testXXE.xml
new file mode 100644
index 0000000..9b3b927
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testXXE.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE roottag PUBLIC "-//OXML/XXE/EN" "file:///couldnt_possibly_exist">
+<roottag>
+</roottag>
\ No newline at end of file
--
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].