You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/09/15 17:24:38 UTC

[tika] branch master updated (92fe9b8 -> c0c2eaf)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 92fe9b8  Merge remote-tracking branch 'origin/master'
     new 1b951f2  improve docs for scope of these tests
     new c0c2eaf  TIKA-2467 refactor creation/configuration of XML parsers/factories/readers to be static methods.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../tika/batch/builders/BatchProcessBuilder.java   |   3 +-
 .../batch/builders/CommandLineParserBuilder.java   |   3 +-
 .../java/org/apache/tika/config/TikaConfig.java    |  17 +-
 .../apache/tika/config/TikaConfigSerializer.java   |   3 +-
 .../java/org/apache/tika/parser/ParseContext.java  | 105 ++---------
 .../external/ExternalParsersConfigReader.java      |   3 +-
 .../XMLReaderUtils.java}                           | 191 +++++++--------------
 .../java/org/apache/tika/eval/io/XMLLogReader.java |   3 +-
 .../apache/tika/eval/reports/ResultsReporter.java  |   3 +-
 .../tika/parser/image/ImageMetadataExtractor.java  |   3 +-
 .../tika/parser/image/xmp/JempboxExtractor.java    |   3 +-
 .../test/java/org/apache/tika/TestXXEInXML.java    |   5 +
 12 files changed, 100 insertions(+), 242 deletions(-)
 copy tika-core/src/main/java/org/apache/tika/{parser/ParseContext.java => utils/XMLReaderUtils.java} (58%)

-- 
To stop receiving notification emails like this one, please contact
['"commits@tika.apache.org" <co...@tika.apache.org>'].

[tika] 01/02: improve docs for scope of these tests

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 1b951f262d3a552d63a7ac75074fff791ce9d0dd
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Sep 15 11:45:25 2017 -0400

    improve docs for scope of these tests
---
 tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
index 7f543d1..720c23a 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestXXEInXML.java
@@ -39,6 +39,11 @@ import java.util.zip.ZipOutputStream;
 
 import static org.junit.Assert.fail;
 
+/**
+ * This tests for XXE in basically xml type files, straight xml and ooxml.
+ * It does not test for XXE prevention in files that may contain xml
+ * files, such as PDFs and other XMP-containing files.
+ */
 public class TestXXEInXML extends TikaTest {
     //TODO: figure out how to test XFA and xmp in PDFs
 

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.

[tika] 02/02: TIKA-2467 refactor creation/configuration of XML parsers/factories/readers to be static methods.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c0c2eafe46224e5c316f2dede395308930a5ec0d
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Sep 15 13:24:27 2017 -0400

    TIKA-2467 refactor creation/configuration of XML parsers/factories/readers
    to be static methods.
---
 .../tika/batch/builders/BatchProcessBuilder.java   |   3 +-
 .../batch/builders/CommandLineParserBuilder.java   |   3 +-
 .../java/org/apache/tika/config/TikaConfig.java    |  17 +-
 .../apache/tika/config/TikaConfigSerializer.java   |   3 +-
 .../java/org/apache/tika/parser/ParseContext.java  | 105 ++---------
 .../external/ExternalParsersConfigReader.java      |   3 +-
 .../XMLReaderUtils.java}                           | 191 +++++++--------------
 .../java/org/apache/tika/eval/io/XMLLogReader.java |   3 +-
 .../apache/tika/eval/reports/ResultsReporter.java  |   3 +-
 .../tika/parser/image/ImageMetadataExtractor.java  |   3 +-
 .../tika/parser/image/xmp/JempboxExtractor.java    |   3 +-
 11 files changed, 95 insertions(+), 242 deletions(-)

diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
index df5e4bf..314ea76 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
@@ -36,6 +36,7 @@ import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.util.ClassLoaderUtil;
 import org.apache.tika.util.XMLDOMUtil;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
@@ -67,7 +68,7 @@ public class BatchProcessBuilder {
     public BatchProcess build(InputStream is, Map<String,String> runtimeAttributes) throws IOException {
         Document doc = null;
         try {
-            DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+            DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
             doc = docBuilder.parse(is);
         } catch (TikaException|SAXException e) {
             throw new IOExceptionWithCause(e);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
index 66b55bf..e58f163 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
@@ -27,6 +27,7 @@ import org.apache.commons.cli.Options;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
@@ -43,7 +44,7 @@ public class CommandLineParserBuilder {
     public Options build(InputStream is) throws IOException {
         Document doc = null;
         try {
-            DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+            DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
             doc = docBuilder.parse(is);
         } catch (TikaException|SAXException e) {
             throw new IOExceptionWithCause(e);
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 8ca0d6b..28ac235 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -65,6 +65,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.utils.AnnotationUtils;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -127,7 +128,7 @@ public class TikaConfig {
     }
     public TikaConfig(Path path, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(path.toFile()), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(path.toFile()), loader);
     }
 
     public TikaConfig(File file)
@@ -136,7 +137,7 @@ public class TikaConfig {
     }
     public TikaConfig(File file, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(file), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(file), loader);
     }
 
     public TikaConfig(URL url)
@@ -145,16 +146,16 @@ public class TikaConfig {
     }
     public TikaConfig(URL url, ClassLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(url.toString()).getDocumentElement(), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(url.toString()).getDocumentElement(), loader);
     }
     public TikaConfig(URL url, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(url.toString()).getDocumentElement(), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(url.toString()).getDocumentElement(), loader);
     }
 
     public TikaConfig(InputStream stream)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(stream));
+        this(XMLReaderUtils.getDocumentBuilder().parse(stream));
     }
 
     public TikaConfig(Document document) throws TikaException, IOException {
@@ -250,7 +251,7 @@ public class TikaConfig {
         } else {
             ServiceLoader tmpServiceLoader = new ServiceLoader();
             try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) {
-                Element element = getBuilder().parse(stream).getDocumentElement();
+                Element element = XMLReaderUtils.getDocumentBuilder().parse(stream).getDocumentElement();
                 serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
@@ -390,10 +391,6 @@ public class TikaConfig {
         }
     }
 
-    private static DocumentBuilder getBuilder() throws TikaException {
-        return new ParseContext().getDocumentBuilder();
-    }
-
     private static Element getChild(Element element, String name) {
         Node child = element.getFirstChild();
         while (child != null) {
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index 3a2249e..c67b03b 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -45,6 +45,7 @@ import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -74,7 +75,7 @@ public class TikaConfigSerializer {
      */
     public static void serialize(TikaConfig config, Mode mode, Writer writer, Charset charset)
             throws Exception {
-        DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+        DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
 
         // root elements
         Document doc = docBuilder.newDocument();
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 2521cc9..332efcd 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -23,19 +23,12 @@ import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLResolver;
-import javax.xml.stream.XMLStreamException;
-import java.io.IOException;
 import java.io.Serializable;
-import java.io.StringReader;
-import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.tika.exception.TikaException;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.xml.sax.SAXNotRecognizedException;
 import org.xml.sax.SAXNotSupportedException;
 import org.xml.sax.XMLReader;
@@ -54,21 +47,6 @@ public class ParseContext implements Serializable {
     /** Map of objects in this context */
     private final Map<String, Object> context = new HashMap<String, Object>();
 
-    private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
-        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
-            return new InputSource(new StringReader(""));
-        }
-    };
-
-    private static final XMLResolver IGNORING_STAX_ENTITY_RESOLVER =
-            new XMLResolver() {
-                @Override
-                public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws
-                        XMLStreamException {
-                    return "";
-                }
-            };
-
     /**
      * Adds the given value to the context as an implementation of the given
      * interface.
@@ -129,13 +107,7 @@ public class ParseContext implements Serializable {
         if (reader != null) {
             return reader;
         }
-        try {
-            reader = getSAXParser().getXMLReader();
-        } catch (SAXException e) {
-            throw new TikaException("Unable to create an XMLReader", e);
-        }
-        reader.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
-        return reader;
+        return XMLReaderUtils.getXMLReader();
     }
 
     /**
@@ -153,13 +125,7 @@ public class ParseContext implements Serializable {
         if (parser != null) {
             return parser;
         } else {
-            try {
-                return getSAXParserFactory().newSAXParser();
-            } catch (ParserConfigurationException e) {
-                throw new TikaException("Unable to configure a SAX parser", e);
-            } catch (SAXException e) {
-                throw new TikaException("Unable to create a SAX parser", e);
-            }
+            return XMLReaderUtils.getSAXParser();
         }
     }
 
@@ -210,21 +176,16 @@ public class ParseContext implements Serializable {
         DocumentBuilderFactory documentBuilderFactory = get(DocumentBuilderFactory.class);
         if (documentBuilderFactory != null) {
             return documentBuilderFactory;
+        } else {
+            return XMLReaderUtils.getDocumentBuilderFactory();
         }
-        documentBuilderFactory = DocumentBuilderFactory.newInstance();
-        documentBuilderFactory.setNamespaceAware(true);
-        documentBuilderFactory.setValidating(false);
-        tryToSetSAXFeatureOnDOMFactory(documentBuilderFactory,
-            XMLConstants.FEATURE_SECURE_PROCESSING, true);
-        tryToSetXercesManager(documentBuilderFactory);
-        return documentBuilderFactory;
     }
 
     /**
      * Returns the DOM builder specified in this parsing context.
      * If a builder is not explicitly specified, then a builder
      * instance is created and returned. The builder instance is
-     * configured to apply an {@link #IGNORING_SAX_ENTITY_RESOLVER},
+     * configured to apply an {@link XMLReaderUtils#IGNORING_SAX_ENTITY_RESOLVER},
      * and it sets the ErrorHandler to <code>null</code>.
      *
      * @since Apache Tika 1.13
@@ -234,15 +195,8 @@ public class ParseContext implements Serializable {
         DocumentBuilder documentBuilder = get(DocumentBuilder.class);
         if (documentBuilder != null) {
             return documentBuilder;
-        }
-        try {
-            DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
-            documentBuilder = documentBuilderFactory.newDocumentBuilder();
-            documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
-            documentBuilder.setErrorHandler(null);
-            return documentBuilder;
-        } catch (ParserConfigurationException e) {
-            throw new TikaException("XML parser not available", e);
+        } else {
+            return XMLReaderUtils.getDocumentBuilder();
         }
     }
 
@@ -251,7 +205,7 @@ public class ParseContext implements Serializable {
      * If a factory is not explicitly specified, then a default factory
      * instance is created and returned. The default factory instance is
      * configured to be namespace-aware and to apply reasonable security
-     * using the {@link #IGNORING_STAX_ENTITY_RESOLVER}.
+     * using the {@link XMLReaderUtils#IGNORING_STAX_ENTITY_RESOLVER}.
      *
      * @since Apache Tika 1.13
      * @return StAX input factory
@@ -261,46 +215,7 @@ public class ParseContext implements Serializable {
         if (factory != null) {
             return factory;
         }
-        factory = XMLInputFactory.newFactory();
-
-        tryToSetStaxProperty(factory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
-        tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
-
-        factory.setXMLResolver(IGNORING_STAX_ENTITY_RESOLVER);
-        return factory;
-    }
-
-    private static void tryToSetSAXFeatureOnDOMFactory(DocumentBuilderFactory dbf, String feature, boolean value) {
-        try {
-            dbf.setFeature(feature, value);
-        } catch (Exception|AbstractMethodError e) {
-        }
-    }
-
-    private static void tryToSetXercesManager(DocumentBuilderFactory dbf) {
-        // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[] {
-                "com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
-        }) {
-            try {
-                Object mgr = Class.forName(securityManagerClassName).newInstance();
-                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
-                setLimit.invoke(mgr, 4096);
-                dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
-                // Stop once one can be setup without error
-                return;
-            } catch (Throwable t) {
-            }
-        }
-    }
-
-    private void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
-        try {
-            factory.setProperty(key, value);
-        } catch (IllegalArgumentException e) {
-            //swallow
-        }
+        return XMLReaderUtils.getXMLInputFactory();
     }
 
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
index 2fd3cb5..1df02e2 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
@@ -32,6 +32,7 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -54,7 +55,7 @@ public final class ExternalParsersConfigReader implements ExternalParsersConfigR
    
    public static List<ExternalParser> read(InputStream stream) throws TikaException, IOException {
       try {
-          DocumentBuilder builder = new ParseContext().getDocumentBuilder();
+          DocumentBuilder builder = XMLReaderUtils.getDocumentBuilder();
           Document document = builder.parse(new InputSource(stream));
           return read(document);
       } catch (SAXException e) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
similarity index 58%
copy from tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
copy to tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 2521cc9..6f24708 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -1,20 +1,29 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+
+package org.apache.tika.utils;
+
+import org.apache.tika.exception.TikaException;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
 
 import javax.xml.XMLConstants;
 import javax.xml.parsers.DocumentBuilder;
@@ -26,33 +35,15 @@ import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLResolver;
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
-import java.io.Serializable;
 import java.io.StringReader;
 import java.lang.reflect.Method;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.tika.exception.TikaException;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
-import org.xml.sax.XMLReader;
 
 /**
- * Parse context. Used to pass context information to Tika parsers.
- *
- * @since Apache Tika 0.5
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+ * Utility functions for reading XML.  If you are doing SAX parsing, make sure
+ * to use the {@link org.apache.tika.sax.OfflineContentHandler} to guard against
+ * XML External Entity attacks.
  */
-public class ParseContext implements Serializable {
-
-    /** Serial version UID. */
-    private static final long serialVersionUID = -5921436862145826534L;
-
-    /** Map of objects in this context */
-    private final Map<String, Object> context = new HashMap<String, Object>();
+public class XMLReaderUtils {
 
     private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -70,51 +61,6 @@ public class ParseContext implements Serializable {
             };
 
     /**
-     * Adds the given value to the context as an implementation of the given
-     * interface.
-     *
-     * @param key the interface implemented by the given value
-     * @param value the value to be added, or <code>null</code> to remove
-     */
-    public <T> void set(Class<T> key, T value) {
-        if (value != null) {
-            context.put(key.getName(), value);
-        } else {
-            context.remove(key.getName());
-        }
-    }
-
-    /**
-     * Returns the object in this context that implements the given interface.
-     *
-     * @param key the interface implemented by the requested object
-     * @return the object that implements the given interface,
-     *         or <code>null</code> if not found
-     */
-    @SuppressWarnings("unchecked")
-    public <T> T get(Class<T> key) {
-        return (T) context.get(key.getName());
-    }
-
-    /**
-     * Returns the object in this context that implements the given interface,
-     * or the given default value if such an object is not found.
-     *
-     * @param key the interface implemented by the requested object
-     * @param defaultValue value to return if the requested object is not found
-     * @return the object that implements the given interface,
-     *         or the given default value if not found
-     */
-    public <T> T get(Class<T> key, T defaultValue) {
-        T value = get(key);
-        if (value != null) {
-            return value;
-        } else {
-            return defaultValue;
-        }
-    }
-
-    /**
      * Returns the XMLReader specified in this parsing context. If a reader
      * is not explicitly specified, then one is created using the specified
      * or the default SAX parser.
@@ -124,11 +70,8 @@ public class ParseContext implements Serializable {
      * @return XMLReader
      * @throws TikaException
      */
-    public XMLReader getXMLReader() throws TikaException {
-        XMLReader reader = get(XMLReader.class);
-        if (reader != null) {
-            return reader;
-        }
+    public static XMLReader getXMLReader() throws TikaException {
+        XMLReader reader;
         try {
             reader = getSAXParser().getXMLReader();
         } catch (SAXException e) {
@@ -142,24 +85,24 @@ public class ParseContext implements Serializable {
      * Returns the SAX parser specified in this parsing context. If a parser
      * is not explicitly specified, then one is created using the specified
      * or the default SAX parser factory.
+     * <p>
+     * Make sure to wrap your handler in the {@link org.apache.tika.sax.OfflineContentHandler} to
+     * prevent XML External Entity attacks
+     * </p>
+
      *
      * @see #getSAXParserFactory()
      * @since Apache Tika 0.8
      * @return SAX parser
      * @throws TikaException if a SAX parser could not be created
      */
-    public SAXParser getSAXParser() throws TikaException {
-        SAXParser parser = get(SAXParser.class);
-        if (parser != null) {
-            return parser;
-        } else {
-            try {
-                return getSAXParserFactory().newSAXParser();
-            } catch (ParserConfigurationException e) {
-                throw new TikaException("Unable to configure a SAX parser", e);
-            } catch (SAXException e) {
-                throw new TikaException("Unable to create a SAX parser", e);
-            }
+    public static SAXParser getSAXParser() throws TikaException {
+        try {
+            return getSAXParserFactory().newSAXParser();
+        } catch (ParserConfigurationException e) {
+            throw new TikaException("Unable to configure a SAX parser", e);
+        } catch (SAXException e) {
+            throw new TikaException("Unable to create a SAX parser", e);
         }
     }
 
@@ -169,29 +112,31 @@ public class ParseContext implements Serializable {
      * instance is created and returned. The default factory instance is
      * configured to be namespace-aware, not validating, and to use
      * {@link XMLConstants#FEATURE_SECURE_PROCESSING secure XML processing}.
+     * <p>
+     * Make sure to wrap your handler in the {@link org.apache.tika.sax.OfflineContentHandler} to
+     * prevent XML External Entity attacks
+     * </p>
      *
      * @since Apache Tika 0.8
      * @return SAX parser factory
      */
-    public SAXParserFactory getSAXParserFactory() {
-        SAXParserFactory factory = get(SAXParserFactory.class);
-        if (factory == null) {
-            factory = SAXParserFactory.newInstance();
-            factory.setNamespaceAware(true);
-            factory.setValidating(false);
-            try {
-                factory.setFeature(
-                        XMLConstants.FEATURE_SECURE_PROCESSING, true);
-            } catch (ParserConfigurationException e) {
-            } catch (SAXNotSupportedException e) {
-            } catch (SAXNotRecognizedException e) {
-                // TIKA-271: Some XML parsers do not support the
-                // secure-processing feature, even though it's required by
-                // JAXP in Java 5. Ignoring the exception is fine here, as
-                // deployments without this feature are inherently vulnerable
-                // to XML denial-of-service attacks.
-            }
+    public static SAXParserFactory getSAXParserFactory() {
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+        factory.setNamespaceAware(true);
+        factory.setValidating(false);
+        try {
+            factory.setFeature(
+                    XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        } catch (ParserConfigurationException e) {
+        } catch (SAXNotSupportedException e) {
+        } catch (SAXNotRecognizedException e) {
+            // TIKA-271: Some XML parsers do not support the
+            // secure-processing feature, even though it's required by
+            // JAXP in Java 5. Ignoring the exception is fine here, as
+            // deployments without this feature are inherently vulnerable
+            // to XML denial-of-service attacks.
         }
+
         return factory;
     }
 
@@ -205,17 +150,13 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return DOM parser factory
      */
-    private DocumentBuilderFactory getDocumentBuilderFactory() {
+    public static DocumentBuilderFactory getDocumentBuilderFactory() {
         //borrowed from Apache POI
-        DocumentBuilderFactory documentBuilderFactory = get(DocumentBuilderFactory.class);
-        if (documentBuilderFactory != null) {
-            return documentBuilderFactory;
-        }
-        documentBuilderFactory = DocumentBuilderFactory.newInstance();
+        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
         documentBuilderFactory.setNamespaceAware(true);
         documentBuilderFactory.setValidating(false);
         tryToSetSAXFeatureOnDOMFactory(documentBuilderFactory,
-            XMLConstants.FEATURE_SECURE_PROCESSING, true);
+                XMLConstants.FEATURE_SECURE_PROCESSING, true);
         tryToSetXercesManager(documentBuilderFactory);
         return documentBuilderFactory;
     }
@@ -230,14 +171,10 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return DOM Builder
      */
-    public DocumentBuilder getDocumentBuilder() throws TikaException {
-        DocumentBuilder documentBuilder = get(DocumentBuilder.class);
-        if (documentBuilder != null) {
-            return documentBuilder;
-        }
+    public static DocumentBuilder getDocumentBuilder() throws TikaException {
         try {
             DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
-            documentBuilder = documentBuilderFactory.newDocumentBuilder();
+            DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
             documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
             documentBuilder.setErrorHandler(null);
             return documentBuilder;
@@ -256,12 +193,8 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return StAX input factory
      */
-    public XMLInputFactory getXMLInputFactory() {
-        XMLInputFactory factory = get(XMLInputFactory.class);
-        if (factory != null) {
-            return factory;
-        }
-        factory = XMLInputFactory.newFactory();
+    public static XMLInputFactory getXMLInputFactory() {
+        XMLInputFactory factory = XMLInputFactory.newFactory();
 
         tryToSetStaxProperty(factory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
         tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
@@ -273,13 +206,13 @@ public class ParseContext implements Serializable {
     private static void tryToSetSAXFeatureOnDOMFactory(DocumentBuilderFactory dbf, String feature, boolean value) {
         try {
             dbf.setFeature(feature, value);
-        } catch (Exception|AbstractMethodError e) {
+        } catch (Exception | AbstractMethodError e) {
         }
     }
 
     private static void tryToSetXercesManager(DocumentBuilderFactory dbf) {
         // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[] {
+        for (String securityManagerClassName : new String[]{
                 "com.sun.org.apache.xerces.internal.util.SecurityManager",
                 "org.apache.xerces.util.SecurityManager"
         }) {
@@ -295,7 +228,7 @@ public class ParseContext implements Serializable {
         }
     }
 
-    private void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
+    private static void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
         try {
             factory.setProperty(key, value);
         } catch (IllegalArgumentException e) {
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
index 753866b..87e4c40 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
@@ -28,6 +28,7 @@ import java.sql.SQLException;
 import org.apache.log4j.Level;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,7 +40,7 @@ public class XMLLogReader {
 
     public void read(InputStream xmlLogFileIs, XMLLogMsgHandler handler) throws XMLStreamException {
         InputStream is = new LogXMLWrappingInputStream(xmlLogFileIs);
-        XMLInputFactory factory = new ParseContext().getXMLInputFactory();
+        XMLInputFactory factory = XMLReaderUtils.getXMLInputFactory();
         XMLStreamReader reader = factory.createXMLStreamReader(is);
 
         Level level = null;
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java b/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
index a9e541a..db2a171 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
@@ -45,6 +45,7 @@ import org.apache.tika.eval.ExtractProfiler;
 import org.apache.tika.eval.db.H2Util;
 import org.apache.tika.eval.db.JDBCUtil;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -104,7 +105,7 @@ public class ResultsReporter {
 
         ResultsReporter r = new ResultsReporter();
 
-        DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+        DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
         Document doc;
         try (InputStream is = Files.newInputStream(p)) {
             doc = docBuilder.parse(is);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index 64ddf73..d2ee0c6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -59,6 +59,7 @@ import org.apache.tika.metadata.TIFF;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.image.xmp.JempboxExtractor;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
@@ -183,7 +184,7 @@ public class ImageMetadataExtractor {
         try (InputStream decoded =
                      new ByteArrayInputStream(xmpData)
         ) {
-            Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
+            Document dom = XMLReaderUtils.getDocumentBuilder().parse(decoded);
             if (dom != null) {
                 xmp = new XMPMetadata(dom);
             }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
index 6d5038a..e847615 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
@@ -37,6 +37,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.XMPMM;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.DateUtils;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
@@ -66,7 +67,7 @@ public class JempboxExtractor {
         try (InputStream decoded =
                              new ByteArrayInputStream(xmpraw.toByteArray())
         ) {
-            Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
+            Document dom = XMLReaderUtils.getDocumentBuilder().parse(decoded);
             if (dom != null) {
                 xmp = new XMPMetadata(dom);
             }

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.