You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/09/25 17:00:49 UTC
[tika] 01/02: TIKA-2727
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 86d4ba1e1806c8c386e913f6848b938f657df2c6
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue Sep 25 12:57:04 2018 -0400
TIKA-2727
---
.../java/org/apache/tika/utils/XMLReaderUtils.java | 281 +++++++++++++++++----
.../src/test/java/org/apache/tika/TestParsers.java | 14 +-
.../org/apache/tika/TestXMLEntityExpansion.java | 54 +++-
.../test/java/org/apache/tika/TestXXEInXML.java | 30 ++-
.../src/test/java/org/apache/tika/XMLTestBase.java | 4 +-
5 files changed, 321 insertions(+), 62 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 71e1f84..ec5ad00 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -24,8 +24,6 @@ import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
@@ -51,6 +49,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -69,6 +68,7 @@ public class XMLReaderUtils implements Serializable {
private static final Logger LOG = Logger.getLogger(XMLReaderUtils.class.getName());
+ private static final String XERCES_SECURITY_MANAGER = "org.apache.xerces.util.SecurityManager";
/**
* Default size for the pool of SAX Parsers
* and the pool of DOM builders
@@ -104,9 +104,9 @@ public class XMLReaderUtils implements Serializable {
private static final ReentrantReadWriteLock SAX_READ_WRITE_LOCK = new ReentrantReadWriteLock();
private static final ReentrantReadWriteLock DOM_READ_WRITE_LOCK = new ReentrantReadWriteLock();
- private static ArrayBlockingQueue<SAXParser> SAX_PARSERS = new ArrayBlockingQueue<>(POOL_SIZE);
- private static ArrayBlockingQueue<DocumentBuilder> DOM_BUILDERS = new ArrayBlockingQueue<>(POOL_SIZE);
-
+ private static ArrayBlockingQueue<PoolSAXParser> SAX_PARSERS = new ArrayBlockingQueue<>(POOL_SIZE);
+ private static ArrayBlockingQueue<PoolDOMBuilder> DOM_BUILDERS = new ArrayBlockingQueue<>(POOL_SIZE);
+ private static final AtomicInteger POOL_GENERATION = new AtomicInteger();
static {
try {
setPoolSize(POOL_SIZE);
@@ -177,7 +177,10 @@ public class XMLReaderUtils implements Serializable {
* Make sure to wrap your handler in the {@link OfflineContentHandler} to
* prevent XML External Entity attacks
* </p>
- *
+ * <p>
+ * If you call reset() on the parser, make sure to replace the
+ * SecurityManager which will be cleared by xerces2 on reset().
+ * </p>
* @return SAX parser
* @throws TikaException if a SAX parser could not be created
* @see #getSAXParserFactory()
@@ -213,18 +216,11 @@ public class XMLReaderUtils implements Serializable {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
- try {
- factory.setFeature(
- XMLConstants.FEATURE_SECURE_PROCESSING, true);
- } catch (ParserConfigurationException e) {
- } catch (SAXNotSupportedException e) {
- } catch (SAXNotRecognizedException e) {
- // TIKA-271: Some XML parsers do not support the
- // secure-processing feature, even though it's required by
- // JAXP in Java 5. Ignoring the exception is fine here, as
- // deployments without this feature are inherently vulnerable
- // to XML denial-of-service attacks.
- }
+ trySetSAXFeature(factory, XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ trySetSAXFeature(factory, "http://xml.org/sax/features/external-general-entities", false);
+ trySetSAXFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false);
+ trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+ trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
return factory;
}
@@ -298,6 +294,18 @@ public class XMLReaderUtils implements Serializable {
return factory;
}
+ private static void trySetSAXFeature(SAXParserFactory saxParserFactory, String feature, boolean enabled) {
+ try {
+ saxParserFactory.setFeature(feature, enabled);
+ } catch (SecurityException e) {
+ throw e;
+ } catch (Exception e) {
+ LOG.log(Level.WARNING, "SAX Feature unsupported: " + feature, e);
+ } catch (AbstractMethodError ame) {
+ LOG.log(Level.WARNING, "Cannot set SAX feature because outdated XML parser in classpath: " + feature, ame);
+ }
+ }
+
private static void trySetSAXFeature(DocumentBuilderFactory documentBuilderFactory, String feature, boolean enabled) {
try {
documentBuilderFactory.setFeature(feature, enabled);
@@ -349,14 +357,18 @@ public class XMLReaderUtils implements Serializable {
* @throws SAXException
*/
public static Document buildDOM(InputStream is, ParseContext context) throws TikaException, IOException, SAXException {
- DocumentBuilder builderFromContext = context.get(DocumentBuilder.class);
- DocumentBuilder builder = (builderFromContext == null) ? acquireDOMBuilder() : builderFromContext;
+ DocumentBuilder builder = context.get(DocumentBuilder.class);
+ PoolDOMBuilder poolBuilder = null;
+ if (builder == null) {
+ poolBuilder = acquireDOMBuilder();
+ builder = poolBuilder.getDocumentBuilder();
+ }
try {
return builder.parse(is);
} finally {
- if (builderFromContext == null) {
- releaseDOMBuilder(builder);
+ if (poolBuilder != null) {
+ releaseDOMBuilder(poolBuilder);
}
}
}
@@ -388,9 +400,9 @@ public class XMLReaderUtils implements Serializable {
* @throws SAXException
*/
public static Document buildDOM(String uriString) throws TikaException, IOException, SAXException {
- DocumentBuilder builder = acquireDOMBuilder();
+ PoolDOMBuilder builder = acquireDOMBuilder();
try {
- return builder.parse(uriString);
+ return builder.getDocumentBuilder().parse(uriString);
} finally {
releaseDOMBuilder(builder);
}
@@ -407,9 +419,9 @@ public class XMLReaderUtils implements Serializable {
* @throws SAXException
*/
public static Document buildDOM(InputStream is) throws TikaException, IOException, SAXException {
- DocumentBuilder builder = acquireDOMBuilder();
+ PoolDOMBuilder builder = acquireDOMBuilder();
try {
- return builder.parse(is);
+ return builder.getDocumentBuilder().parse(is);
} finally {
releaseDOMBuilder(builder);
}
@@ -430,30 +442,34 @@ public class XMLReaderUtils implements Serializable {
*/
public static void parseSAX(InputStream is, DefaultHandler contentHandler, ParseContext context)
throws TikaException, IOException, SAXException {
- SAXParser contextParser = context.get(SAXParser.class);
- SAXParser parser = (contextParser == null) ? acquireSAXParser() : contextParser;
+ SAXParser saxParser = context.get(SAXParser.class);
+ PoolSAXParser poolSAXParser = null;
+ if (saxParser == null) {
+ poolSAXParser = acquireSAXParser();
+ saxParser = poolSAXParser.getSAXParser();
+ }
try {
- parser.parse(is, contentHandler);
+ saxParser.parse(is, contentHandler);
} finally {
- if (contextParser == null) {
- releaseParser(parser);
+ if (poolSAXParser != null) {
+ releaseParser(poolSAXParser);
}
}
}
/**
* Acquire a SAXParser from the pool. Make sure to
- * {@link #releaseParser(SAXParser)} in
+ * {@link #releaseDOMBuilder(PoolDOMBuilder)} in
* a <code>finally</code> block every time you call this.
*
- * @return a SAXParser
+ * @return a DocumentBuilder
* @throws TikaException
*/
- private static DocumentBuilder acquireDOMBuilder()
+ private static PoolDOMBuilder acquireDOMBuilder()
throws TikaException {
int waiting = 0;
while (true) {
- DocumentBuilder builder = null;
+ PoolDOMBuilder builder = null;
try {
DOM_READ_WRITE_LOCK.readLock().lock();
builder = DOM_BUILDERS.poll(100, TimeUnit.MILLISECONDS);
@@ -483,7 +499,10 @@ public class XMLReaderUtils implements Serializable {
*
* @param builder builder to return
*/
- private static void releaseDOMBuilder(DocumentBuilder builder) {
+ private static void releaseDOMBuilder(PoolDOMBuilder builder) {
+ if (builder.getPoolGeneration() != POOL_GENERATION.get()) {
+ return;
+ }
try {
builder.reset();
} catch (UnsupportedOperationException e) {
@@ -506,17 +525,17 @@ public class XMLReaderUtils implements Serializable {
/**
* Acquire a SAXParser from the pool. Make sure to
- * {@link #releaseParser(SAXParser)} in
+ * {@link #releaseParser(PoolSAXParser)} in
* a <code>finally</code> block every time you call this.
*
* @return a SAXParser
* @throws TikaException
*/
- private static SAXParser acquireSAXParser()
+ private static PoolSAXParser acquireSAXParser()
throws TikaException {
int waiting = 0;
while (true) {
- SAXParser parser = null;
+ PoolSAXParser parser = null;
try {
SAX_READ_WRITE_LOCK.readLock().lock();
parser = SAX_PARSERS.poll(100, TimeUnit.MILLISECONDS);
@@ -546,12 +565,13 @@ public class XMLReaderUtils implements Serializable {
*
* @param parser parser to return
*/
- private static void releaseParser(SAXParser parser) {
- try {
- parser.reset();
- } catch (UnsupportedOperationException e) {
- //ignore
+ private static void releaseParser(PoolSAXParser parser) {
+ //if this is a different generation, don't put it back
+ //in the pool
+ if (parser.getGeneration() != POOL_GENERATION.get()) {
+ return;
}
+ parser.reset();
try {
SAX_READ_WRITE_LOCK.readLock().lock();
//if there are extra parsers (e.g. after a reset of the pool to a smaller size),
@@ -585,8 +605,13 @@ public class XMLReaderUtils implements Serializable {
SAX_READ_WRITE_LOCK.writeLock().lock();
SAX_PARSERS.clear();
SAX_PARSERS = new ArrayBlockingQueue<>(poolSize);
+ int generation = POOL_GENERATION.incrementAndGet();
for (int i = 0; i < poolSize; i++) {
- SAX_PARSERS.offer(getSAXParser());
+ try {
+ SAX_PARSERS.offer(buildPoolParser(generation, getSAXParserFactory().newSAXParser()));
+ } catch (SAXException|ParserConfigurationException e) {
+ throw new TikaException("problem creating sax parser", e);
+ }
}
} finally {
@@ -597,7 +622,8 @@ public class XMLReaderUtils implements Serializable {
DOM_BUILDERS.clear();
DOM_BUILDERS = new ArrayBlockingQueue<>(poolSize);
for (int i = 0; i < poolSize; i++) {
- DOM_BUILDERS.offer(getDocumentBuilder());
+ DOM_BUILDERS.offer(
+ new PoolDOMBuilder(POOL_GENERATION.get(), getDocumentBuilder()));
}
} finally {
DOM_READ_WRITE_LOCK.writeLock().unlock();
@@ -657,8 +683,11 @@ public class XMLReaderUtils implements Serializable {
// Stop once one can be setup without error
return;
} catch (ClassNotFoundException e) {
+ e.printStackTrace();
// continue without log, this is expected in some setups
- } catch (Throwable e) { // NOSONAR - also catch things like NoClassDefError here
+ } catch (Throwable e) {
+ e.printStackTrace();
+ // NOSONAR - also catch things like NoClassDefError here
// throttle the log somewhat as it can spam the log otherwise
if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
@@ -698,4 +727,162 @@ public class XMLReaderUtils implements Serializable {
public static int getMaxEntityExpansions() {
return MAX_ENTITY_EXPANSIONS;
}
+
+ private static class PoolDOMBuilder {
+ private final int poolGeneration;
+ private final DocumentBuilder documentBuilder;
+
+ PoolDOMBuilder(int poolGeneration, DocumentBuilder documentBuilder) {
+ this.poolGeneration = poolGeneration;
+ this.documentBuilder = documentBuilder;
+ }
+
+ public int getPoolGeneration() {
+ return poolGeneration;
+ }
+
+ public DocumentBuilder getDocumentBuilder() {
+ return documentBuilder;
+ }
+
+ public void reset() {
+ documentBuilder.reset();
+ documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
+ documentBuilder.setErrorHandler(null);
+ }
+ }
+ private abstract static class PoolSAXParser {
+ final int poolGeneration;
+ final SAXParser saxParser;
+
+ PoolSAXParser(int poolGeneration, SAXParser saxParser) {
+ this.poolGeneration = poolGeneration;
+ this.saxParser = saxParser;
+ }
+
+ abstract void reset();
+
+ public int getGeneration() {
+ return poolGeneration;
+ }
+
+ public SAXParser getSAXParser() {
+ return saxParser;
+ }
+ }
+
+
+ private static PoolSAXParser buildPoolParser(int generation, SAXParser parser) {
+ boolean canReset = false;
+ try {
+ parser.reset();
+ canReset = true;
+ } catch (UnsupportedOperationException e) {
+ canReset = false;
+ }
+ boolean hasSecurityManager = false;
+ try {
+ Object mgr = Class.forName(XERCES_SECURITY_MANAGER).newInstance();
+ Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+ setLimit.invoke(mgr, MAX_ENTITY_EXPANSIONS);
+
+ parser.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+ hasSecurityManager = true;
+ } catch (SecurityException e) {
+ //don't swallow security exceptions
+ throw e;
+ } catch (ClassNotFoundException e) {
+ // continue without log, this is expected in some setups
+ } catch (Throwable e) {
+ // NOSONAR - also catch things like NoClassDefError here
+ // throttle the log somewhat as it can spam the log otherwise
+ if (System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+ LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+ LAST_LOG = System.currentTimeMillis();
+ }
+ }
+
+ boolean canSetJaxPEntity = false;
+ if (!hasSecurityManager) {
+ // use the builtin way of setting the property
+ try {
+ parser.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", MAX_ENTITY_EXPANSIONS);
+ canSetJaxPEntity = true;
+ } catch (SAXException e) { // NOSONAR - also catch things like NoClassDefError here
+ // throttle the log somewhat as it can spam the log otherwise
+ if (System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+ LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+ LAST_LOG = System.currentTimeMillis();
+ }
+ }
+ }
+
+ if (!canReset && hasSecurityManager) {
+ return new XercesPoolSAXParser(generation, parser);
+ } else if (canReset && hasSecurityManager) {
+ return new Xerces2PoolSAXParser(generation, parser);
+ } else if (canReset && !hasSecurityManager && canSetJaxPEntity) {
+ return new BuiltInPoolSAXParser(generation, parser);
+ } else {
+ return new UnrecognizedPoolSAXParser(generation, parser);
+ }
+
+ }
+
+
+ private static class XercesPoolSAXParser extends PoolSAXParser {
+ public XercesPoolSAXParser(int generation, SAXParser parser) {
+ super(generation, parser);
+ }
+
+ @Override
+ public void reset() {
+ //don't do anything
+ }
+ }
+
+ private static class Xerces2PoolSAXParser extends PoolSAXParser {
+ public Xerces2PoolSAXParser(int generation, SAXParser parser) {
+ super(generation, parser);
+ }
+
+ @Override
+ void reset() {
+ try {
+ Object object = saxParser.getProperty(XERCES_SECURITY_MANAGER);
+ saxParser.reset();
+ saxParser.setProperty(XERCES_SECURITY_MANAGER, object);
+ } catch (SAXException e) {
+ }
+ }
+ }
+
+ private static class BuiltInPoolSAXParser extends PoolSAXParser {
+ public BuiltInPoolSAXParser(int generation, SAXParser parser) {
+ super(generation, parser);
+ }
+
+ @Override
+ void reset() {
+ saxParser.reset();
+ }
+ }
+
+ private static class UnrecognizedPoolSAXParser extends PoolSAXParser {
+ //if unrecognized, try to set all protections
+ //and try to reset every time
+ public UnrecognizedPoolSAXParser(int generation, SAXParser parser) {
+ super(generation, parser);
+ }
+
+ @Override
+ void reset() {
+ try {
+ saxParser.reset();
+ } catch (UnsupportedOperationException e) {
+
+ }
+ trySetXercesSecurityManager(saxParser);
+ }
+ }
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java b/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
index e359fbc..50cb67f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
@@ -20,12 +20,14 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.File;
+import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.junit.Before;
@@ -113,6 +115,16 @@ public class TestParsers extends MultiThreadedTikaTest {
@Ignore("ignore for regular builds; run occasionally")
public void testAllMultiThreaded() throws Exception {
//this runs against all files in /test-documents
- //testMultiThreaded(10, 100, null);
+ Parser p = new AutoDetectParser();
+ ParseContext[] contexts = new ParseContext[10];
+ for (int i = 0; i < 10; i++) {
+ contexts[i] = new ParseContext();
+ }
+ testMultiThreaded(p, contexts, 10, 100, new FileFilter() {
+ @Override
+ public boolean accept(File pathname) {
+ return true;
+ }
+ });
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
index 54e3a7c..decb3f5 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXMLEntityExpansion.java
@@ -18,20 +18,25 @@ package org.apache.tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.XMLReaderUtils;
import org.junit.Ignore;
import org.junit.Test;
import org.xml.sax.SAXParseException;
+import javax.xml.XMLConstants;
+import javax.xml.parsers.SAXParserFactory;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
/**
* Tests to confirm defenses against entity expansion attacks.
*/
-@Ignore("initial draft, needs more work")
+
public class TestXMLEntityExpansion extends XMLTestBase {
private static final byte[] ENTITY_EXPANSION_BOMB = new String(
@@ -60,6 +65,7 @@ public class TestXMLEntityExpansion extends XMLTestBase {
//Set a reasonable amount of time as the timeout
//Make sure that the test apparatus actually works.
+ @Ignore
@Test(timeout = 20000)
public void testVulnerableParser() throws Exception {
byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
@@ -69,7 +75,7 @@ public class TestXMLEntityExpansion extends XMLTestBase {
@Override
public void run() {
try {
- parse("injected", new ByteArrayInputStream(injected), new XMLTestBase.VulnerableSAXParser());
+ parse("injected", new ByteArrayInputStream(injected), new XMLTestBase.VulnerableSAXParser(), new ParseContext());
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -82,17 +88,53 @@ public class TestXMLEntityExpansion extends XMLTestBase {
}
- @Test(timeout = 20000)//
+ @Test(timeout = 30000)//
public void testProtectedXML() throws Exception {
byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
- test("injected", injected, new AutoDetectParser());
+ Parser p = new AutoDetectParser();
+ ParseContext context = new ParseContext();
+ for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+ test("default", injected, p, context);
+ }
+ context.set(SAXParserFactory.class, XMLReaderUtils.getSAXParserFactory());
+ for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+ test("default sax", injected, p, context);
+ }
+ String provider =
+ "com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl";
+ // create a new SAXParserFactory
+ SAXParserFactory factory = null;
+ try {
+ factory = SAXParserFactory.newInstance(provider, null);
+ } catch (Exception e) {
+ return;
+ }
+ factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ context.set(SAXParserFactory.class, factory);
+ for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+ test("built-in SAX", injected, p, context);
+ }
+ }
+
+ @Test
+ public void testDOM() throws Exception {
+ byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+ byte[] injected = injectXML(bytes, ENTITY_EXPANSION_BOMB);
+ for (int i = 0; i < XMLReaderUtils.getPoolSize()*6; i++) {
+ try {
+ XMLReaderUtils.buildDOM(new ByteArrayInputStream(injected));
+ fail("should never parse!");
+ } catch (SAXParseException e) {
+ assertTrue(e.getMessage() != null && e.getMessage().contains("entity expansions"));
+ }
+ }
}
- private static void test(String testFileName, byte[] bytes, Parser parser) throws Exception {
+ private static void test(String testFileName, byte[] bytes, Parser parser, ParseContext context) throws Exception {
boolean ex = false;
try {
- parse(testFileName, new ByteArrayInputStream(bytes), parser);
+ parse(testFileName, new ByteArrayInputStream(bytes), parser, context);
} catch (SAXParseException e) {
if (e.getMessage() == null ||
! e.getMessage().contains("entity expansions")) {
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 6a84edf..5fd3dca 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -24,9 +24,11 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.sax.ToHTMLContentHandler;
+import org.apache.tika.utils.XMLReaderUtils;
import org.junit.Ignore;
import org.junit.Test;
import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
@@ -45,7 +47,6 @@ import static org.junit.Assert.fail;
* It does not test for XXE prevention in files that may contain xml
* files, such as PDFs and other XMP-containing files.
*/
-@Ignore
public class TestXXEInXML extends XMLTestBase {
//TODO: figure out how to test XFA and xmp in PDFs
@@ -53,9 +54,12 @@ public class TestXXEInXML extends XMLTestBase {
"<!DOCTYPE roottag PUBLIC \"-//OXML/XXE/EN\" \"file:///couldnt_possibly_exist\">".getBytes(StandardCharsets.UTF_8);
@Test
+ @Ignore("ignore vulnerable tests")
public void testConfirmVulnerable() throws Exception {
try {
- parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new VulnerableSAXParser());
+ parse("testXXE.xml",
+ getResourceAsStream("/test-documents/testXXE.xml"),
+ new VulnerableSAXParser(), new ParseContext());
fail("should have failed!!!");
} catch (FileNotFoundException e) {
@@ -64,7 +68,8 @@ public class TestXXEInXML extends XMLTestBase {
@Test
public void testXML() throws Exception {
- parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"), new AutoDetectParser());
+ parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"),
+ new AutoDetectParser(), new ParseContext());
}
@Test
@@ -72,7 +77,9 @@ public class TestXXEInXML extends XMLTestBase {
byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
byte[] injected = injectXML(bytes, XXE);
try {
- parse("injected", new ByteArrayInputStream(injected), new VulnerableSAXParser());
+ parse("injected",
+ new ByteArrayInputStream(injected),
+ new VulnerableSAXParser(), new ParseContext());
fail("injected should have triggered xxe");
} catch (FileNotFoundException e) {
@@ -85,14 +92,16 @@ public class TestXXEInXML extends XMLTestBase {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
IOUtils.copy(is, bos);
byte[] injected = injectXML(bos.toByteArray(), XXE);
- parse("testWORD_2003ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+ parse("testWORD_2003ml.xml",
+ new ByteArrayInputStream(injected), new AutoDetectParser(), new ParseContext());
is.close();
is = getResourceAsStream("/test-documents/testWORD_2006ml.xml");
bos = new ByteArrayOutputStream();
IOUtils.copy(is, bos);
injected = injectXML(bos.toByteArray(), XXE);
- parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected), new AutoDetectParser());
+ parse("testWORD_2006ml.xml", new ByteArrayInputStream(injected),
+ new AutoDetectParser(), new ParseContext());
}
@Test
@@ -156,6 +165,15 @@ public class TestXXEInXML extends XMLTestBase {
}
}
+ @Test
+ public void testDOM() throws Exception {
+ byte[] bytes = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><document>blah</document>".getBytes(StandardCharsets.UTF_8);
+ byte[] injected = injectXML(bytes, XXE);
+ for (int i = 0; i < XMLReaderUtils.getPoolSize()*2; i++) {
+ //this shouldn't throw an exception
+ XMLReaderUtils.buildDOM(new ByteArrayInputStream(injected), new ParseContext());
+ }
+ }
//use this to confirm that this works
//by manually turning off the SafeContentHandler in SXWPFWordExtractorDecorator's
//handlePart
diff --git a/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
index ad02eaf..aea7fa3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
+++ b/tika-parsers/src/test/java/org/apache/tika/XMLTestBase.java
@@ -147,7 +147,7 @@ public class XMLTestBase extends TikaTest {
}
}
- static void parse(String testFileName, InputStream is, Parser parser) throws Exception {
- parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+ static void parse(String testFileName, InputStream is, Parser parser, ParseContext context) throws Exception {
+ parser.parse(is, new DefaultHandler(), new Metadata(), context);
}
}