You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2017/09/15 17:24:40 UTC

[tika] 02/02: TIKA-2467 refactor creation/configuration of XML parsers/factories/readers to be static methods.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c0c2eafe46224e5c316f2dede395308930a5ec0d
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Sep 15 13:24:27 2017 -0400

    TIKA-2467 refactor creation/configuration of XML parsers/factories/readers
    to be static methods.
---
 .../tika/batch/builders/BatchProcessBuilder.java   |   3 +-
 .../batch/builders/CommandLineParserBuilder.java   |   3 +-
 .../java/org/apache/tika/config/TikaConfig.java    |  17 +-
 .../apache/tika/config/TikaConfigSerializer.java   |   3 +-
 .../java/org/apache/tika/parser/ParseContext.java  | 105 ++---------
 .../external/ExternalParsersConfigReader.java      |   3 +-
 .../XMLReaderUtils.java}                           | 191 +++++++--------------
 .../java/org/apache/tika/eval/io/XMLLogReader.java |   3 +-
 .../apache/tika/eval/reports/ResultsReporter.java  |   3 +-
 .../tika/parser/image/ImageMetadataExtractor.java  |   3 +-
 .../tika/parser/image/xmp/JempboxExtractor.java    |   3 +-
 11 files changed, 95 insertions(+), 242 deletions(-)

diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
index df5e4bf..314ea76 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
@@ -36,6 +36,7 @@ import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.util.ClassLoaderUtil;
 import org.apache.tika.util.XMLDOMUtil;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
@@ -67,7 +68,7 @@ public class BatchProcessBuilder {
     public BatchProcess build(InputStream is, Map<String,String> runtimeAttributes) throws IOException {
         Document doc = null;
         try {
-            DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+            DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
             doc = docBuilder.parse(is);
         } catch (TikaException|SAXException e) {
             throw new IOExceptionWithCause(e);
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
index 66b55bf..e58f163 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/CommandLineParserBuilder.java
@@ -27,6 +27,7 @@ import org.apache.commons.cli.Options;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
@@ -43,7 +44,7 @@ public class CommandLineParserBuilder {
     public Options build(InputStream is) throws IOException {
         Document doc = null;
         try {
-            DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+            DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
             doc = docBuilder.parse(is);
         } catch (TikaException|SAXException e) {
             throw new IOExceptionWithCause(e);
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 8ca0d6b..28ac235 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -65,6 +65,7 @@ import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.utils.AnnotationUtils;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -127,7 +128,7 @@ public class TikaConfig {
     }
     public TikaConfig(Path path, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(path.toFile()), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(path.toFile()), loader);
     }
 
     public TikaConfig(File file)
@@ -136,7 +137,7 @@ public class TikaConfig {
     }
     public TikaConfig(File file, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(file), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(file), loader);
     }
 
     public TikaConfig(URL url)
@@ -145,16 +146,16 @@ public class TikaConfig {
     }
     public TikaConfig(URL url, ClassLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(url.toString()).getDocumentElement(), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(url.toString()).getDocumentElement(), loader);
     }
     public TikaConfig(URL url, ServiceLoader loader)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(url.toString()).getDocumentElement(), loader);
+        this(XMLReaderUtils.getDocumentBuilder().parse(url.toString()).getDocumentElement(), loader);
     }
 
     public TikaConfig(InputStream stream)
             throws TikaException, IOException, SAXException {
-        this(getBuilder().parse(stream));
+        this(XMLReaderUtils.getDocumentBuilder().parse(stream));
     }
 
     public TikaConfig(Document document) throws TikaException, IOException {
@@ -250,7 +251,7 @@ public class TikaConfig {
         } else {
             ServiceLoader tmpServiceLoader = new ServiceLoader();
             try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) {
-                Element element = getBuilder().parse(stream).getDocumentElement();
+                Element element = XMLReaderUtils.getDocumentBuilder().parse(stream).getDocumentElement();
                 serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
@@ -390,10 +391,6 @@ public class TikaConfig {
         }
     }
 
-    private static DocumentBuilder getBuilder() throws TikaException {
-        return new ParseContext().getDocumentBuilder();
-    }
-
     private static Element getChild(Element element, String name) {
         Node child = element.getFirstChild();
         while (child != null) {
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index 3a2249e..c67b03b 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -45,6 +45,7 @@ import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -74,7 +75,7 @@ public class TikaConfigSerializer {
      */
     public static void serialize(TikaConfig config, Mode mode, Writer writer, Charset charset)
             throws Exception {
-        DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+        DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
 
         // root elements
         Document doc = docBuilder.newDocument();
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 2521cc9..332efcd 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -23,19 +23,12 @@ import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLResolver;
-import javax.xml.stream.XMLStreamException;
-import java.io.IOException;
 import java.io.Serializable;
-import java.io.StringReader;
-import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.tika.exception.TikaException;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.xml.sax.SAXNotRecognizedException;
 import org.xml.sax.SAXNotSupportedException;
 import org.xml.sax.XMLReader;
@@ -54,21 +47,6 @@ public class ParseContext implements Serializable {
     /** Map of objects in this context */
     private final Map<String, Object> context = new HashMap<String, Object>();
 
-    private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
-        public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
-            return new InputSource(new StringReader(""));
-        }
-    };
-
-    private static final XMLResolver IGNORING_STAX_ENTITY_RESOLVER =
-            new XMLResolver() {
-                @Override
-                public Object resolveEntity(String publicID, String systemID, String baseURI, String namespace) throws
-                        XMLStreamException {
-                    return "";
-                }
-            };
-
     /**
      * Adds the given value to the context as an implementation of the given
      * interface.
@@ -129,13 +107,7 @@ public class ParseContext implements Serializable {
         if (reader != null) {
             return reader;
         }
-        try {
-            reader = getSAXParser().getXMLReader();
-        } catch (SAXException e) {
-            throw new TikaException("Unable to create an XMLReader", e);
-        }
-        reader.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
-        return reader;
+        return XMLReaderUtils.getXMLReader();
     }
 
     /**
@@ -153,13 +125,7 @@ public class ParseContext implements Serializable {
         if (parser != null) {
             return parser;
         } else {
-            try {
-                return getSAXParserFactory().newSAXParser();
-            } catch (ParserConfigurationException e) {
-                throw new TikaException("Unable to configure a SAX parser", e);
-            } catch (SAXException e) {
-                throw new TikaException("Unable to create a SAX parser", e);
-            }
+            return XMLReaderUtils.getSAXParser();
         }
     }
 
@@ -210,21 +176,16 @@ public class ParseContext implements Serializable {
         DocumentBuilderFactory documentBuilderFactory = get(DocumentBuilderFactory.class);
         if (documentBuilderFactory != null) {
             return documentBuilderFactory;
+        } else {
+            return XMLReaderUtils.getDocumentBuilderFactory();
         }
-        documentBuilderFactory = DocumentBuilderFactory.newInstance();
-        documentBuilderFactory.setNamespaceAware(true);
-        documentBuilderFactory.setValidating(false);
-        tryToSetSAXFeatureOnDOMFactory(documentBuilderFactory,
-            XMLConstants.FEATURE_SECURE_PROCESSING, true);
-        tryToSetXercesManager(documentBuilderFactory);
-        return documentBuilderFactory;
     }
 
     /**
      * Returns the DOM builder specified in this parsing context.
      * If a builder is not explicitly specified, then a builder
      * instance is created and returned. The builder instance is
-     * configured to apply an {@link #IGNORING_SAX_ENTITY_RESOLVER},
+     * configured to apply an {@link XMLReaderUtils#IGNORING_SAX_ENTITY_RESOLVER},
      * and it sets the ErrorHandler to <code>null</code>.
      *
      * @since Apache Tika 1.13
@@ -234,15 +195,8 @@ public class ParseContext implements Serializable {
         DocumentBuilder documentBuilder = get(DocumentBuilder.class);
         if (documentBuilder != null) {
             return documentBuilder;
-        }
-        try {
-            DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
-            documentBuilder = documentBuilderFactory.newDocumentBuilder();
-            documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
-            documentBuilder.setErrorHandler(null);
-            return documentBuilder;
-        } catch (ParserConfigurationException e) {
-            throw new TikaException("XML parser not available", e);
+        } else {
+            return XMLReaderUtils.getDocumentBuilder();
         }
     }
 
@@ -251,7 +205,7 @@ public class ParseContext implements Serializable {
      * If a factory is not explicitly specified, then a default factory
      * instance is created and returned. The default factory instance is
      * configured to be namespace-aware and to apply reasonable security
-     * using the {@link #IGNORING_STAX_ENTITY_RESOLVER}.
+     * using the {@link XMLReaderUtils#IGNORING_STAX_ENTITY_RESOLVER}.
      *
      * @since Apache Tika 1.13
      * @return StAX input factory
@@ -261,46 +215,7 @@ public class ParseContext implements Serializable {
         if (factory != null) {
             return factory;
         }
-        factory = XMLInputFactory.newFactory();
-
-        tryToSetStaxProperty(factory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
-        tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
-
-        factory.setXMLResolver(IGNORING_STAX_ENTITY_RESOLVER);
-        return factory;
-    }
-
-    private static void tryToSetSAXFeatureOnDOMFactory(DocumentBuilderFactory dbf, String feature, boolean value) {
-        try {
-            dbf.setFeature(feature, value);
-        } catch (Exception|AbstractMethodError e) {
-        }
-    }
-
-    private static void tryToSetXercesManager(DocumentBuilderFactory dbf) {
-        // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[] {
-                "com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
-        }) {
-            try {
-                Object mgr = Class.forName(securityManagerClassName).newInstance();
-                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
-                setLimit.invoke(mgr, 4096);
-                dbf.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
-                // Stop once one can be setup without error
-                return;
-            } catch (Throwable t) {
-            }
-        }
-    }
-
-    private void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
-        try {
-            factory.setProperty(key, value);
-        } catch (IllegalArgumentException e) {
-            //swallow
-        }
+        return XMLReaderUtils.getXMLInputFactory();
     }
 
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
index 2fd3cb5..1df02e2 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParsersConfigReader.java
@@ -32,6 +32,7 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
@@ -54,7 +55,7 @@ public final class ExternalParsersConfigReader implements ExternalParsersConfigR
    
    public static List<ExternalParser> read(InputStream stream) throws TikaException, IOException {
       try {
-          DocumentBuilder builder = new ParseContext().getDocumentBuilder();
+          DocumentBuilder builder = XMLReaderUtils.getDocumentBuilder();
           Document document = builder.parse(new InputSource(stream));
           return read(document);
       } catch (SAXException e) {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
similarity index 58%
copy from tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
copy to tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 2521cc9..6f24708 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -1,20 +1,29 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+
+package org.apache.tika.utils;
+
+import org.apache.tika.exception.TikaException;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXNotRecognizedException;
+import org.xml.sax.SAXNotSupportedException;
+import org.xml.sax.XMLReader;
 
 import javax.xml.XMLConstants;
 import javax.xml.parsers.DocumentBuilder;
@@ -26,33 +35,15 @@ import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLResolver;
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
-import java.io.Serializable;
 import java.io.StringReader;
 import java.lang.reflect.Method;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.tika.exception.TikaException;
-import org.xml.sax.EntityResolver;
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
-import org.xml.sax.XMLReader;
 
 /**
- * Parse context. Used to pass context information to Tika parsers.
- *
- * @since Apache Tika 0.5
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>
+ * Utility functions for reading XML.  If you are doing SAX parsing, make sure
+ * to use the {@link org.apache.tika.sax.OfflineContentHandler} to guard against
+ * XML External Entity attacks.
  */
-public class ParseContext implements Serializable {
-
-    /** Serial version UID. */
-    private static final long serialVersionUID = -5921436862145826534L;
-
-    /** Map of objects in this context */
-    private final Map<String, Object> context = new HashMap<String, Object>();
+public class XMLReaderUtils {
 
     private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
         public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -70,51 +61,6 @@ public class ParseContext implements Serializable {
             };
 
     /**
-     * Adds the given value to the context as an implementation of the given
-     * interface.
-     *
-     * @param key the interface implemented by the given value
-     * @param value the value to be added, or <code>null</code> to remove
-     */
-    public <T> void set(Class<T> key, T value) {
-        if (value != null) {
-            context.put(key.getName(), value);
-        } else {
-            context.remove(key.getName());
-        }
-    }
-
-    /**
-     * Returns the object in this context that implements the given interface.
-     *
-     * @param key the interface implemented by the requested object
-     * @return the object that implements the given interface,
-     *         or <code>null</code> if not found
-     */
-    @SuppressWarnings("unchecked")
-    public <T> T get(Class<T> key) {
-        return (T) context.get(key.getName());
-    }
-
-    /**
-     * Returns the object in this context that implements the given interface,
-     * or the given default value if such an object is not found.
-     *
-     * @param key the interface implemented by the requested object
-     * @param defaultValue value to return if the requested object is not found
-     * @return the object that implements the given interface,
-     *         or the given default value if not found
-     */
-    public <T> T get(Class<T> key, T defaultValue) {
-        T value = get(key);
-        if (value != null) {
-            return value;
-        } else {
-            return defaultValue;
-        }
-    }
-
-    /**
      * Returns the XMLReader specified in this parsing context. If a reader
      * is not explicitly specified, then one is created using the specified
      * or the default SAX parser.
@@ -124,11 +70,8 @@ public class ParseContext implements Serializable {
      * @return XMLReader
      * @throws TikaException
      */
-    public XMLReader getXMLReader() throws TikaException {
-        XMLReader reader = get(XMLReader.class);
-        if (reader != null) {
-            return reader;
-        }
+    public static XMLReader getXMLReader() throws TikaException {
+        XMLReader reader;
         try {
             reader = getSAXParser().getXMLReader();
         } catch (SAXException e) {
@@ -142,24 +85,24 @@ public class ParseContext implements Serializable {
      * Returns the SAX parser specified in this parsing context. If a parser
      * is not explicitly specified, then one is created using the specified
      * or the default SAX parser factory.
+     * <p>
+     * Make sure to wrap your handler in the {@link org.apache.tika.sax.OfflineContentHandler} to
+     * prevent XML External Entity attacks
+     * </p>
+
      *
      * @see #getSAXParserFactory()
      * @since Apache Tika 0.8
      * @return SAX parser
      * @throws TikaException if a SAX parser could not be created
      */
-    public SAXParser getSAXParser() throws TikaException {
-        SAXParser parser = get(SAXParser.class);
-        if (parser != null) {
-            return parser;
-        } else {
-            try {
-                return getSAXParserFactory().newSAXParser();
-            } catch (ParserConfigurationException e) {
-                throw new TikaException("Unable to configure a SAX parser", e);
-            } catch (SAXException e) {
-                throw new TikaException("Unable to create a SAX parser", e);
-            }
+    public static SAXParser getSAXParser() throws TikaException {
+        try {
+            return getSAXParserFactory().newSAXParser();
+        } catch (ParserConfigurationException e) {
+            throw new TikaException("Unable to configure a SAX parser", e);
+        } catch (SAXException e) {
+            throw new TikaException("Unable to create a SAX parser", e);
         }
     }
 
@@ -169,29 +112,31 @@ public class ParseContext implements Serializable {
      * instance is created and returned. The default factory instance is
      * configured to be namespace-aware, not validating, and to use
      * {@link XMLConstants#FEATURE_SECURE_PROCESSING secure XML processing}.
+     * <p>
+     * Make sure to wrap your handler in the {@link org.apache.tika.sax.OfflineContentHandler} to
+     * prevent XML External Entity attacks
+     * </p>
      *
      * @since Apache Tika 0.8
      * @return SAX parser factory
      */
-    public SAXParserFactory getSAXParserFactory() {
-        SAXParserFactory factory = get(SAXParserFactory.class);
-        if (factory == null) {
-            factory = SAXParserFactory.newInstance();
-            factory.setNamespaceAware(true);
-            factory.setValidating(false);
-            try {
-                factory.setFeature(
-                        XMLConstants.FEATURE_SECURE_PROCESSING, true);
-            } catch (ParserConfigurationException e) {
-            } catch (SAXNotSupportedException e) {
-            } catch (SAXNotRecognizedException e) {
-                // TIKA-271: Some XML parsers do not support the
-                // secure-processing feature, even though it's required by
-                // JAXP in Java 5. Ignoring the exception is fine here, as
-                // deployments without this feature are inherently vulnerable
-                // to XML denial-of-service attacks.
-            }
+    public static SAXParserFactory getSAXParserFactory() {
+        SAXParserFactory factory = SAXParserFactory.newInstance();
+        factory.setNamespaceAware(true);
+        factory.setValidating(false);
+        try {
+            factory.setFeature(
+                    XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        } catch (ParserConfigurationException e) {
+        } catch (SAXNotSupportedException e) {
+        } catch (SAXNotRecognizedException e) {
+            // TIKA-271: Some XML parsers do not support the
+            // secure-processing feature, even though it's required by
+            // JAXP in Java 5. Ignoring the exception is fine here, as
+            // deployments without this feature are inherently vulnerable
+            // to XML denial-of-service attacks.
         }
+
         return factory;
     }
 
@@ -205,17 +150,13 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return DOM parser factory
      */
-    private DocumentBuilderFactory getDocumentBuilderFactory() {
+    public static DocumentBuilderFactory getDocumentBuilderFactory() {
         //borrowed from Apache POI
-        DocumentBuilderFactory documentBuilderFactory = get(DocumentBuilderFactory.class);
-        if (documentBuilderFactory != null) {
-            return documentBuilderFactory;
-        }
-        documentBuilderFactory = DocumentBuilderFactory.newInstance();
+        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
         documentBuilderFactory.setNamespaceAware(true);
         documentBuilderFactory.setValidating(false);
         tryToSetSAXFeatureOnDOMFactory(documentBuilderFactory,
-            XMLConstants.FEATURE_SECURE_PROCESSING, true);
+                XMLConstants.FEATURE_SECURE_PROCESSING, true);
         tryToSetXercesManager(documentBuilderFactory);
         return documentBuilderFactory;
     }
@@ -230,14 +171,10 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return DOM Builder
      */
-    public DocumentBuilder getDocumentBuilder() throws TikaException {
-        DocumentBuilder documentBuilder = get(DocumentBuilder.class);
-        if (documentBuilder != null) {
-            return documentBuilder;
-        }
+    public static DocumentBuilder getDocumentBuilder() throws TikaException {
         try {
             DocumentBuilderFactory documentBuilderFactory = getDocumentBuilderFactory();
-            documentBuilder = documentBuilderFactory.newDocumentBuilder();
+            DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
             documentBuilder.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
             documentBuilder.setErrorHandler(null);
             return documentBuilder;
@@ -256,12 +193,8 @@ public class ParseContext implements Serializable {
      * @since Apache Tika 1.13
      * @return StAX input factory
      */
-    public XMLInputFactory getXMLInputFactory() {
-        XMLInputFactory factory = get(XMLInputFactory.class);
-        if (factory != null) {
-            return factory;
-        }
-        factory = XMLInputFactory.newFactory();
+    public static XMLInputFactory getXMLInputFactory() {
+        XMLInputFactory factory = XMLInputFactory.newFactory();
 
         tryToSetStaxProperty(factory, XMLInputFactory.IS_NAMESPACE_AWARE, true);
         tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
@@ -273,13 +206,13 @@ public class ParseContext implements Serializable {
     private static void tryToSetSAXFeatureOnDOMFactory(DocumentBuilderFactory dbf, String feature, boolean value) {
         try {
             dbf.setFeature(feature, value);
-        } catch (Exception|AbstractMethodError e) {
+        } catch (Exception | AbstractMethodError e) {
         }
     }
 
     private static void tryToSetXercesManager(DocumentBuilderFactory dbf) {
         // Try built-in JVM one first, standalone if not
-        for (String securityManagerClassName : new String[] {
+        for (String securityManagerClassName : new String[]{
                 "com.sun.org.apache.xerces.internal.util.SecurityManager",
                 "org.apache.xerces.util.SecurityManager"
         }) {
@@ -295,7 +228,7 @@ public class ParseContext implements Serializable {
         }
     }
 
-    private void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
+    private static void tryToSetStaxProperty(XMLInputFactory factory, String key, boolean value) {
         try {
             factory.setProperty(key, value);
         } catch (IllegalArgumentException e) {
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
index 753866b..87e4c40 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/io/XMLLogReader.java
@@ -28,6 +28,7 @@ import java.sql.SQLException;
 import org.apache.log4j.Level;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,7 +40,7 @@ public class XMLLogReader {
 
     public void read(InputStream xmlLogFileIs, XMLLogMsgHandler handler) throws XMLStreamException {
         InputStream is = new LogXMLWrappingInputStream(xmlLogFileIs);
-        XMLInputFactory factory = new ParseContext().getXMLInputFactory();
+        XMLInputFactory factory = XMLReaderUtils.getXMLInputFactory();
         XMLStreamReader reader = factory.createXMLStreamReader(is);
 
         Level level = null;
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java b/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
index a9e541a..db2a171 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/reports/ResultsReporter.java
@@ -45,6 +45,7 @@ import org.apache.tika.eval.ExtractProfiler;
 import org.apache.tika.eval.db.H2Util;
 import org.apache.tika.eval.db.JDBCUtil;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@@ -104,7 +105,7 @@ public class ResultsReporter {
 
         ResultsReporter r = new ResultsReporter();
 
-        DocumentBuilder docBuilder = new ParseContext().getDocumentBuilder();
+        DocumentBuilder docBuilder = XMLReaderUtils.getDocumentBuilder();
         Document doc;
         try (InputStream is = Files.newInputStream(p)) {
             doc = docBuilder.parse(is);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index 64ddf73..d2ee0c6 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -59,6 +59,7 @@ import org.apache.tika.metadata.TIFF;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.image.xmp.JempboxExtractor;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
@@ -183,7 +184,7 @@ public class ImageMetadataExtractor {
         try (InputStream decoded =
                      new ByteArrayInputStream(xmpData)
         ) {
-            Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
+            Document dom = XMLReaderUtils.getDocumentBuilder().parse(decoded);
             if (dom != null) {
                 xmp = new XMPMetadata(dom);
             }
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
index 6d5038a..e847615 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
@@ -37,6 +37,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.metadata.XMPMM;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.utils.DateUtils;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
@@ -66,7 +67,7 @@ public class JempboxExtractor {
         try (InputStream decoded =
                              new ByteArrayInputStream(xmpraw.toByteArray())
         ) {
-            Document dom = new ParseContext().getDocumentBuilder().parse(decoded);
+            Document dom = XMLReaderUtils.getDocumentBuilder().parse(decoded);
             if (dom != null) {
                 xmp = new XMPMetadata(dom);
             }

-- 
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.