You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/09/25 17:00:49 UTC

[tika] 01/02: TIKA-2727

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 86d4ba1e1806c8c386e913f6848b938f657df2c6
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue Sep 25 12:57:04 2018 -0400

    TIKA-2727
---
 .../java/org/apache/tika/utils/XMLReaderUtils.java | 281 +++++++++++++++++----
 .../src/test/java/org/apache/tika/TestParsers.java |  14 +-
 .../org/apache/tika/TestXMLEntityExpansion.java    |  54 +++-
 .../test/java/org/apache/tika/TestXXEInXML.java    |  30 ++-
 .../src/test/java/org/apache/tika/XMLTestBase.java |   4 +-
 5 files changed, 321 insertions(+), 62 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 71e1f84..ec5ad00 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -24,8 +24,6 @@ import org.w3c.dom.Document;
 import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
 
@@ -51,6 +49,7 @@ import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -69,6 +68,7 @@ public class XMLReaderUtils implements Serializable {
 
     private static final Logger LOG = Logger.getLogger(XMLReaderUtils.class.getName());
 
+    private static final String XERCES_SECURITY_MANAGER = "org.apache.xerces.util.SecurityManager";
     /**
      * Default size for the pool of SAX Parsers
      * and the pool of DOM builders
@@ -104,9 +104,9 @@ public class XMLReaderUtils implements Serializable {
     private static final ReentrantReadWriteLock SAX_READ_WRITE_LOCK = new ReentrantReadWriteLock();
     private static final ReentrantReadWriteLock DOM_READ_WRITE_LOCK = new ReentrantReadWriteLock();
 
-    private static ArrayBlockingQueue<SAXParser> SAX_PARSERS = new ArrayBlockingQueue<>(POOL_SIZE);
-    private static ArrayBlockingQueue<DocumentBuilder> DOM_BUILDERS = new ArrayBlockingQueue<>(POOL_SIZE);
-
+    private static ArrayBlockingQueue<PoolSAXParser> SAX_PARSERS = new ArrayBlockingQueue<>(POOL_SIZE);
+    private static ArrayBlockingQueue<PoolDOMBuilder> DOM_BUILDERS = new ArrayBlockingQueue<>(POOL_SIZE);
+    private static final AtomicInteger POOL_GENERATION = new AtomicInteger();
     static {
         try {
             setPoolSize(POOL_SIZE);
@@ -177,7 +177,10 @@ public class XMLReaderUtils implements Serializable {
      * Make sure to wrap your handler in the {@link OfflineContentHandler} to
      * prevent XML External Entity attacks
      * </p>
-     *
+     * <p>
+     * If you call reset() on the parser, make sure to replace the
+     * SecurityManager which will be cleared by xerces2 on reset().
+     * </p>
      * @return SAX parser
      * @throws TikaException if a SAX parser could not be created
      * @see #getSAXParserFactory()
@@ -213,18 +216,11 @@ public class XMLReaderUtils implements Serializable {
         SAXParserFactory factory = SAXParserFactory.newInstance();
         factory.setNamespaceAware(true);
         factory.setValidating(false);
-        try {
-            factory.setFeature(
-                    XMLConstants.FEATURE_SECURE_PROCESSING, true);
-        } catch (ParserConfigurationException e) {
-        } catch (SAXNotSupportedException e) {
-        } catch (SAXNotRecognizedException e) {
-            // TIKA-271: Some XML parsers do not support the
-            // secure-processing feature, even though it's required by
-            // JAXP in Java 5. Ignoring the exception is fine here, as
-            // deployments without this feature are inherently vulnerable
-            // to XML denial-of-service attacks.
-        }
+        trySetSAXFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        trySetSAXFeature(factory, "http://xml.org/sax/features/external-general-entities", false);
+        trySetSAXFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false);
+        trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+        trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
 
         return factory;
     }
@@ -298,6 +294,18 @@ public class XMLReaderUtils implements Serializable {
         return factory;
     }
 
+    private static void trySetSAXFeature(SAXParserFactory saxParserFactory, String feature, boolean enabled) {
+        try {
+            saxParserFactory.setFeature(feature, enabled);
+        } catch (SecurityException e) {
+            throw e;
+        } catch (Exception e) {
+            LOG.log(Level.WARNING, "SAX Feature unsupported: " + feature, e);
+        } catch (AbstractMethodError ame) {
+            LOG.log(Level.WARNING, "Cannot set SAX feature because outdated XML parser in classpath: " + feature, ame);
+        }
+    }
+
     private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) {
         try {
             documentBuilderFactory.setFeature(feature, enabled);
@@ -349,14 +357,18 @@ public class XMLReaderUtils implements Serializable {
      * @throws SAXException
      */
     public static Document buildDOM(InputStream is, ParseContext context) throws TikaException, IOException, SAXException {
-        DocumentBuilder builderFromContext = context.get(DocumentBuilder.class);
-        DocumentBuilder builder = (builderFromContext == null) ? acquireDOMBuilder() : builderFromContext;
+        DocumentBuilder builder = context.get(DocumentBuilder.class);
+        PoolDOMBuilder poolBuilder = null;
+        if (builder == null) {
+            poolBuilder = acquireDOMBuilder();
+            builder = poolBuilder.getDocumentBuilder();
+        }
 
         try {
             return builder.parse(is);
         } finally {
-            if (builderFromContext == null) {
-                releaseDOMBuilder(builder);
+            if (poolBuilder != null) {
+                releaseDOMBuilder(poolBuilder);
             }
         }
     }
@@ -388,9 +400,9 @@ public class XMLReaderUtils implements Serializable {
      * @throws SAXException
      */
     public static Document buildDOM(String uriString) throws TikaException, IOException, SAXException {
-        DocumentBuilder builder = acquireDOMBuilder();
+        PoolDOMBuilder builder = acquireDOMBuilder();
         try {
-            return builder.parse(uriString);
+            return builder.getDocumentBuilder().parse(uriString);
         } finally {
             releaseDOMBuilder(builder);
         }
@@ -407,9 +419,9 @@ public class XMLReaderUtils implements Serializable {
      * @throws SAXException
      */
     public static Document buildDOM(InputStream is) throws TikaException, IOException, SAXException {
-        DocumentBuilder builder = acquireDOMBuilder();
+        PoolDOMBuilder builder = acquireDOMBuilder();
         try {
-            return builder.parse(is);
+            return builder.getDocumentBuilder().parse(is);
         } finally {
             releaseDOMBuilder(builder);
         }
@@ -430,30 +442,34 @@ public class XMLReaderUtils implements Serializable {
      */
     public static void parseSAX(InputStream is, DefaultHandler contentHandler, ParseContext context)
             throws TikaException, IOException, SAXException {
-        SAXParser contextParser = context.get(SAXParser.class);
-        SAXParser parser = (contextParser == null) ? acquireSAXParser() : contextParser;
+        SAXParser saxParser = context.get(SAXParser.class);
+        PoolSAXParser poolSAXParser = null;
+        if (saxParser == null) {
+            poolSAXParser = acquireSAXParser();
+            saxParser = poolSAXParser.getSAXParser();
+        }
         try {
-            parser.parse(is, contentHandler);
+            saxParser.parse(is, contentHandler);
         } finally {
-            if (contextParser == null) {
-                releaseParser(parser);
+            if (poolSAXParser != null) {
+                releaseParser(poolSAXParser);
             }
         }
     }
 
     /**
      * Acquire a SAXParser from the pool.  Make sure to
-     * {@link #releaseParser(SAXParser)} in
+     * {@link #releaseDOMBuilder(PoolDOMBuilder)} in
      * a <code>finally</code> block every time you call this.
      *
-     * @return a SAXParser
+     * @return a DocumentBuilder
      * @throws TikaException
      */
-    private static DocumentBuilder acquireDOMBuilder()
+    private static PoolDOMBuilder acquireDOMBuilder()
             throws TikaException {
         int waiting = 0;
         while (true) {
-            DocumentBuilder builder = null;
+            PoolDOMBuilder builder = null;
             try {
                 DOM_READ_WRITE_LOCK.readLock().lock();
                 builder = DOM_BUILDERS.poll(100, TimeUnit.MILLISECONDS);
@@ -483,7 +499,10 @@ public class XMLReaderUtils implements Serializable {
      *
      * @param builder builder to return
      */
-    private static void releaseDOMBuilder(DocumentBuilder builder) {
+    private static void releaseDOMBuilder(PoolDOMBuilder builder) {
+        if (builder.getPoolGeneration() != POOL_GENERATION.get()) {
+            return;
+        }
         try {
             builder.reset();
         } catch (UnsupportedOperationException e) {
@@ -506,17 +525,17 @@ public class XMLReaderUtils implements Serializable {
 
     /**
      * Acquire a SAXParser from the pool.  Make sure to
-     * {@link #releaseParser(SAXParser)} in
+     * {@link #releaseParser(PoolSAXParser)} in
      * a <code>finally</code> block every time you call this.
      *
      * @return a SAXParser
      * @throws TikaException
      */
-    private static SAXParser acquireSAXParser()
+    private static PoolSAXParser acquireSAXParser()
             throws TikaException {
         int waiting = 0;
         while (true) {
-            SAXParser parser = null;
+            PoolSAXParser parser = null;
             try {
                 SAX_READ_WRITE_LOCK.readLock().lock();
                 parser = SAX_PARSERS.poll(100, TimeUnit.MILLISECONDS);
@@ -546,12 +565,13 @@ public class XMLReaderUtils implements Serializable {
      *
      * @param parser parser to return
      */
-    private static void releaseParser(SAXParser parser) {
-        try {
-            parser.reset();
-        } catch (UnsupportedOperationException e) {
-            //ignore
+    private static void releaseParser(PoolSAXParser parser) {
+        //if this is a different generation, don't put it back
+        //in the pool
+        if (parser.getGeneration() != POOL_GENERATION.get()) {
+            return;
         }
+        parser.reset();
         try {
             SAX_READ_WRITE_LOCK.readLock().lock();
             //if there are extra parsers (e.g. after a reset of the pool to a smaller size),
@@ -585,8 +605,13 @@ public class XMLReaderUtils implements Serializable {
             SAX_READ_WRITE_LOCK.writeLock().lock();
             SAX_PARSERS.clear();
             SAX_PARSERS = new ArrayBlockingQueue<>(poolSize);
+            int generation = POOL_GENERATION.incrementAndGet();
             for (int i = 0; i < poolSize; i++) {
-                SAX_PARSERS.offer(getSAXParser());
+                try {
+                    SAX_PARSERS.offer(buildPoolParser(generation, getSAXParserFactory().newSAXParser()));
+                } catch (SAXException|ParserConfigurationException e) {
+                    throw new TikaException("problem creating sax parser", e);
+                }
             }
 
         } finally {
@@ -597,7 +622,8 @@ public class XMLReaderUtils implements Serializable {
             DOM_BUILDERS.clear();
             DOM_BUILDERS = new ArrayBlockingQueue<>(poolSize);
             for (int i = 0; i < poolSize; i++) {
-                DOM_BUILDERS.offer(getDocumentBuilder());
+                DOM_BUILDERS.offer(
+                        new PoolDOMBuilder(POOL_GENERATION.get(), getDocumentBuilder()));
             }
         } finally {
             DOM_READ_WRITE_LOCK.writeLock().unlock();
@@ -657,8 +683,11 @@ public class XMLReaderUtils implements Serializable {
                 // Stop once one can be setup without error
                 return;
             } catch (ClassNotFoundException e) {
+                e.printStackTrace();
                 // continue without log, this is expected in some setups
-            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
+            } catch (Throwable e) {
+                e.printStackTrace();
+                // NOSONAR - also catch things like NoClassDefError here
                 // throttle the log somewhat as it can spam the log otherwise
                 if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
                     LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
@@ -698,4 +727,162 @@ public class XMLReaderUtils implements Serializable {
     public static int getMaxEntityExpansions() {
         return MAX_ENTITY_EXPANSIONS;
     }
+
+    private static class PoolDOMBuilder {
+        private final int poolGeneration;
+        private final DocumentBuilder documentBuilder;
+
+        PoolDOMBuilder(int poolGeneration, DocumentBuilder documentBuilder) {
+            this.poolGeneration = poolGeneration;
+            this.documentBuilder = documentBuilder;
+        }
+
+        public int getPoolGeneration() {
+            return poolGeneration;
+        }
+
+        public DocumentBuilder getDocumentBuilder() {
+            return documentBuilder;
+        }
+
+        public void reset() {
+            documentBuilder.reset();
+            documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
+            documentBuilder.setErrorHandler(null);
+        }
+    }
+    private abstract static class PoolSAXParser {
+        final int poolGeneration;
+        final SAXParser saxParser;
+
+        PoolSAXParser(int poolGeneration, SAXParser saxParser) {
+            this.poolGeneration = poolGeneration;
+            this.saxParser = saxParser;
+        }
+
+        abstract void reset();
+
+        public int getGeneration() {
+            return poolGeneration;
+        }
+
+        public SAXParser getSAXParser() {
+            return saxParser;
+        }
+    }
+
+
+    private static PoolSAXParser buildPoolParser(int generation, SAXParser parser) {
+        boolean canReset = false;
+        try {
+            parser.reset();
+            canReset = true;
+        } catch (UnsupportedOperationException e) {
+            canReset = false;
+        }
+        boolean hasSecurityManager = false;
+        try {
+            Object mgr = Class.forName(XERCES_SECURITY_MANAGER).newInstance();
+            Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+            setLimit.invoke(mgr, MAX_ENTITY_EXPANSIONS);
+
+            parser.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+            hasSecurityManager = true;
+        } catch (SecurityException e) {
+            //don't swallow security exceptions
+            throw e;
+        } catch (ClassNotFoundException e) {
+            // continue without log, this is expected in some setups
+        } catch (Throwable e) {
+            // NOSONAR - also catch things like NoClassDefError here
+            // throttle the log somewhat as it can spam the log otherwise
+            if (System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                LAST_LOG = System.currentTimeMillis();
+            }
+        }
+
+        boolean canSetJaxPEntity = false;
+        if (!hasSecurityManager) {
+            // use the builtin way of setting the property
+            try {
+                parser.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", MAX_ENTITY_EXPANSIONS);
+                canSetJaxPEntity = true;
+            } catch (SAXException e) {     // NOSONAR - also catch things like NoClassDefError here
+                // throttle the log somewhat as it can spam the log otherwise
+                if (System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                    LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                    LAST_LOG = System.currentTimeMillis();
+                }
+            }
+        }
+
+        if (!canReset && hasSecurityManager) {
+            return new XercesPoolSAXParser(generation, parser);
+        } else if (canReset && hasSecurityManager) {
+            return new Xerces2PoolSAXParser(generation, parser);
+        } else if (canReset && !hasSecurityManager && canSetJaxPEntity) {
+            return new BuiltInPoolSAXParser(generation, parser);
+        } else {
+            return new UnrecognizedPoolSAXParser(generation, parser);
+        }
+
+    }
+
+
+    private static class XercesPoolSAXParser extends PoolSAXParser {
+        public XercesPoolSAXParser(int generation, SAXParser parser) {
+            super(generation, parser);
+        }
+
+        @Override
+        public void reset() {
+            //don't do anything
+        }
+    }
+
+    private static class Xerces2PoolSAXParser extends PoolSAXParser {
+        public Xerces2PoolSAXParser(int generation, SAXParser parser) {
+            super(generation, parser);
+        }
+
+        @Override
+        void reset() {
+            try {
+                Object object = saxParser.getProperty(XERCES_SECURITY_MANAGER);
+                saxParser.reset();
+                saxParser.setProperty(XERCES_SECURITY_MANAGER, object);
+            } catch (SAXException e) {
+            }
+        }
+    }
+
+    private static class BuiltInPoolSAXParser extends PoolSAXParser {
+        public BuiltInPoolSAXParser(int generation, SAXParser parser) {
+            super(generation, parser);
+        }
+
+        @Override
+        void reset() {
+            saxParser.reset();
+        }
+    }
+
+    private static class UnrecognizedPoolSAXParser extends PoolSAXParser {
+        //if unrecognized, try to set all protections
+        //and try to reset every time
+        public UnrecognizedPoolSAXParser(int generation, SAXParser parser) {
+            super(generation, parser);
+        }
+
+        @Override
+        void reset() {
+            try {
+                saxParser.reset();
+            } catch (UnsupportedOperationException e) {
+
+            }
+            trySetXercesSecurityManager(saxParser);
+        }
+    }
 }
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java b/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
index e359fbc..50cb67f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
@@ -20,12 +20,14 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.File;
+import java.io.FileFilter;
 import java.io.FileInputStream;
 import java.io.InputStream;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.junit.Before;
@@ -113,6 +115,16 @@ public class TestParsers extends MultiThreadedTikaTest {
     @Ignore("ignore for regular builds; run occasionally")
     public void testAllMultiThreaded() throws Exception {
         //this runs against all files in /test-documents
-        //testMultiThreaded(10, 100, null);
+        Parser p = new AutoDetectParser();
+        ParseContext[] contexts = new ParseContext[10];
+        for (int i = 0; i < 10; i++) {
+             contexts[i] = new ParseContext();
+        }
+        testMultiThreaded(p, contexts, 10, 100, new FileFilter() {
+            @Override
+            public boolean accept(File pathname) {
+                return true;
+            }
+        });
     }
 }
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
index 54e3a7c..decb3f5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
@@ -18,20 +18,25 @@ package org.apache.tika;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.SAXParseException;
 
+import javax.xml.XMLConstants;
+import javax.xml.parsers.SAXParserFactory;
 import java.io.ByteArrayInputStream;
 import java.nio.charset.StandardCharsets;
 
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 /**
  * Tests to confirm defenses against entity expansion attacks.
  */
-@Ignore("initial draft, needs more work")
+
 public class TestXMLEntityExpansion extends XMLTestBase {
 
     private static final byte[] ENTITY_EXPANSION_BOMB = new String(
@@ -60,6 +65,7 @@ public class TestXMLEntityExpansion extends XMLTestBase {
 
     //Set a reasonable amount of time as the timeout
     //Make sure that the test apparatus actually works.
+    @Ignore
     @Test(timeout = 20000)
     public void testVulnerableParser() throws Exception {
         byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
@@ -69,7 +75,7 @@ public class TestXMLEntityExpansion extends XMLTestBase {
             @Override
             public void run() {
                 try {
-                    parse("injected", new ByteArrayInputStream(injected), new XMLTestBase.VulnerableSAXParser());
+                    parse("injected", new ByteArrayInputStream(injected), new XMLTestBase.VulnerableSAXParser(), new ParseContext());
                 } catch (Exception e) {
                     throw new RuntimeException(e);
                 }
@@ -82,17 +88,53 @@ public class TestXMLEntityExpansion extends XMLTestBase {
 
     }
 
-    @Test(timeout = 20000)//
+    @Test(timeout = 30000)//
     public void testProtectedXML() throws Exception {
         byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
         byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
-        test("injected", injected, new AutoDetectParser());
+        Parser p = new AutoDetectParser();
+        ParseContext context = new ParseContext();
+        for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+            test("default", injected, p, context);
+        }
+        context.set(SAXParserFactory.class, XMLReaderUtils.getSAXParserFactory());
+        for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+            test("default sax", injected, p, context);
+        }
+        String provider =
+                "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl";
+        // create a new SAXParserFactory
+        SAXParserFactory factory = null;
+        try {
+            factory = SAXParserFactory.newInstance(provider, null);
+        } catch (Exception e) {
+            return;
+        }
+        factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        context.set(SAXParserFactory.class, factory);
+        for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+            test("built-in SAX", injected, p, context);
+        }
+    }
+
+    @Test
+    public void testDOM() throws Exception {
+        byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+        byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
+        for (int i = 0; i < XMLReaderUtils.getPoolSize()*6; i++) {
+            try {
+                XMLReaderUtils.buildDOM(new ByteArrayInputStream(injected));
+                fail("should never parse!");
+            } catch (SAXParseException e) {
+                assertTrue(e.getMessage() != null && e.getMessage().contains("entity expansions"));
+            }
+        }
     }
 
-    private static void test(String testFileName, byte[] bytes, Parser parser) throws Exception {
+    private static void test(String testFileName, byte[] bytes, Parser parser, ParseContext context) throws Exception {
         boolean ex = false;
         try {
-            parse(testFileName, new ByteArrayInputStream(bytes), parser);
+            parse(testFileName, new ByteArrayInputStream(bytes), parser, context);
         } catch (SAXParseException e) {
             if (e.getMessage() == null ||
                     ! e.getMessage().contains("entity expansions")) {
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 6a84edf..5fd3dca 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -24,9 +24,11 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.microsoft.OfficeParserConfig;
 import org.apache.tika.sax.ToHTMLContentHandler;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
@@ -45,7 +47,6 @@ import static org.junit.Assert.fail;
  * It does not test for XXE prevention in files that may contain xml
  * files, such as PDFs and other XMP-containing files.
  */
-@Ignore
 public class TestXXEInXML extends XMLTestBase {
     //TODO: figure out how to test XFA and xmp in PDFs
 
@@ -53,9 +54,12 @@ public class TestXXEInXML extends XMLTestBase {
             "<!DOCTYPE roottag PUBLIC \"-//OXML/XXE/EN\" \"file:///couldnt_possibly_exist\">".getBytes(StandardCharsets.UTF_8);
 
     @Test
+    @Ignore("ignore vulnerable tests")
     public void testConfirmVulnerable() throws Exception {
         try {
-            parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new VulnerableSAXParser());
+            parse("testXXE.xml",
+                    getResourceAsStream("/test-documents/testXXE.xml"),
+                    new VulnerableSAXParser(), new ParseContext());
             fail("should have failed!!!");
         } catch (FileNotFoundException e) {
 
@@ -64,7 +68,8 @@ public class TestXXEInXML extends XMLTestBase {
 
     @Test
     public void testXML() throws Exception {
-        parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new AutoDetectParser());
+        parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"),
+                new AutoDetectParser(), new ParseContext());
     }
 
     @Test
@@ -72,7 +77,9 @@ public class TestXXEInXML extends XMLTestBase {
         byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
         byte[] injected = injectXML(bytes, XXE);
         try {
-            parse("injected", new ByteArrayInputStream(injected), new VulnerableSAXParser());
+            parse("injected",
+                    new ByteArrayInputStream(injected),
+                    new VulnerableSAXParser(), new ParseContext());
             fail("injected should have triggered xxe");
         } catch (FileNotFoundException e) {
 
@@ -85,14 +92,16 @@ public class TestXXEInXML extends XMLTestBase {
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         IOUtils.copy(is, bos);
         byte[] injected = injectXML(bos.toByteArray(), XXE);
-        parse("testWORD_2003ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+        parse("testWORD_2003ml.xml",
+                new ByteArrayInputStream(injected), new AutoDetectParser(), new ParseContext());
         is.close();
 
         is = getResourceAsStream("/test-documents/testWORD_2006ml.xml");
         bos = new ByteArrayOutputStream();
         IOUtils.copy(is, bos);
         injected = injectXML(bos.toByteArray(), XXE);
-        parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+        parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected),
+                new AutoDetectParser(), new ParseContext());
     }
 
     @Test
@@ -156,6 +165,15 @@ public class TestXXEInXML extends XMLTestBase {
         }
     }
 
+    @Test
+    public void testDOM() throws Exception {
+        byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+        byte[] injected = injectXML(bytes, XXE);
+        for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+            //this shouldn't throw an exception
+            XMLReaderUtils.buildDOM(new ByteArrayInputStream(injected), new ParseContext());
+        }
+    }
     //use this to confirm that this works
     //by manually turning off the SafeContentHandler in SXWPFWordExtractorDecorator's
     //handlePart
diff --git a/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
index ad02eaf..aea7fa3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
+++ b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
@@ -147,7 +147,7 @@ public class XMLTestBase extends TikaTest {
 
         }
     }
-    static void parse(String testFileName, InputStream is, Parser parser) throws Exception {
-        parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+    static void parse(String testFileName, InputStream is, Parser parser, ParseContext context) throws Exception {
+        parser.parse(is, new DefaultHandler(), new Metadata(), context);
     }
 }