You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2019/01/03 20:09:42 UTC

[tika] branch master updated: TIKA-2802 -- try to clear the XMLReader's resources to avoid OOM

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new a068882  TIKA-2802 -- try to clear the XMLReader's resources to avoid OOM
a068882 is described below

commit a0688825b15b8d3f1672236b0f1f6536c8a863c4
Author: TALLISON <ta...@apache.org>
AuthorDate: Thu Jan 3 15:03:40 2019 -0500

    TIKA-2802 -- try to clear the XMLReader's resources to avoid OOM
---
 .../java/org/apache/tika/utils/XMLReaderUtils.java | 95 ++++++++++++++++++++--
 1 file changed, 86 insertions(+), 9 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index ec5ad00..c50d797 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -21,9 +21,13 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.OfflineContentHandler;
 import org.w3c.dom.Document;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.DTDHandler;
 import org.xml.sax.EntityResolver;
+import org.xml.sax.ErrorHandler;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.DefaultHandler;
 
@@ -69,6 +73,40 @@ public class XMLReaderUtils implements Serializable {
     private static final Logger LOG = Logger.getLogger(XMLReaderUtils.class.getName());
 
     private static final String XERCES_SECURITY_MANAGER = "org.apache.xerces.util.SecurityManager";
+
+    private static final String XERCES_SECURITY_MANAGER_PROPERTY = "http://apache.org/xml/properties/security-manager";
+
+    private static final ContentHandler IGNORING_CONTENT_HANDLER = new DefaultHandler();
+
+    private static final DTDHandler IGNORING_DTD_HANDLER = new DTDHandler() {
+        @Override
+        public void notationDecl(String name, String publicId, String systemId) throws SAXException {
+
+        }
+
+        @Override
+        public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException {
+
+        }
+    };
+
+    private static final ErrorHandler IGNORING_ERROR_HANDLER = new ErrorHandler() {
+        @Override
+        public void warning(SAXParseException exception) throws SAXException {
+
+        }
+
+        @Override
+        public void error(SAXParseException exception) throws SAXException {
+
+        }
+
+        @Override
+        public void fatalError(SAXParseException exception) throws SAXException {
+
+        }
+    };
+
     /**
      * Default size for the pool of SAX Parsers
      * and the pool of DOM builders
@@ -566,12 +604,12 @@ public class XMLReaderUtils implements Serializable {
      * @param parser parser to return
      */
     private static void releaseParser(PoolSAXParser parser) {
+        parser.reset();
         //if this is a different generation, don't put it back
         //in the pool
         if (parser.getGeneration() != POOL_GENERATION.get()) {
             return;
         }
-        parser.reset();
         try {
             SAX_READ_WRITE_LOCK.readLock().lock();
             //if there are extra parsers (e.g. after a reset of the pool to a smaller size),
@@ -603,6 +641,10 @@ public class XMLReaderUtils implements Serializable {
             //parsers have already started.  We could have an NPE on SAX_PARSERS
             //if we didn't lock.
             SAX_READ_WRITE_LOCK.writeLock().lock();
+            //free up any resources before emptying SAX_PARSERS
+            for (PoolSAXParser parser : SAX_PARSERS) {
+                parser.reset();
+            }
             SAX_PARSERS.clear();
             SAX_PARSERS = new ArrayBlockingQueue<>(poolSize);
             int generation = POOL_GENERATION.incrementAndGet();
@@ -613,7 +655,6 @@ public class XMLReaderUtils implements Serializable {
                     throw new TikaException("problem creating sax parser", e);
                 }
             }
-
         } finally {
             SAX_READ_WRITE_LOCK.writeLock().unlock();
         }
@@ -636,13 +677,13 @@ public class XMLReaderUtils implements Serializable {
         // Try built-in JVM one first, standalone if not
         for (String securityManagerClassName : new String[] {
                 //"com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
+                XERCES_SECURITY_MANAGER
         }) {
             try {
                 Object mgr = Class.forName(securityManagerClassName).newInstance();
                 Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
                 setLimit.invoke(mgr, MAX_ENTITY_EXPANSIONS);
-                factory.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+                factory.setAttribute(XERCES_SECURITY_MANAGER_PROPERTY, mgr);
                 // Stop once one can be setup without error
                 return;
             } catch (ClassNotFoundException e) {
@@ -673,13 +714,14 @@ public class XMLReaderUtils implements Serializable {
         // Try built-in JVM one first, standalone if not
         for (String securityManagerClassName : new String[] {
                 //"com.sun.org.apache.xerces.internal.util.SecurityManager",
-                "org.apache.xerces.util.SecurityManager"
+                XERCES_SECURITY_MANAGER
         }) {
             try {
                 Object mgr = Class.forName(securityManagerClassName).newInstance();
                 Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
                 setLimit.invoke(mgr, MAX_ENTITY_EXPANSIONS);
-                parser.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+
+                parser.setProperty(XERCES_SECURITY_MANAGER_PROPERTY, mgr);
                 // Stop once one can be setup without error
                 return;
             } catch (ClassNotFoundException e) {
@@ -786,7 +828,7 @@ public class XMLReaderUtils implements Serializable {
             Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
             setLimit.invoke(mgr, MAX_ENTITY_EXPANSIONS);
 
-            parser.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+            parser.setProperty(XERCES_SECURITY_MANAGER_PROPERTY, mgr);
             hasSecurityManager = true;
         } catch (SecurityException e) {
             //don't swallow security exceptions
@@ -838,6 +880,12 @@ public class XMLReaderUtils implements Serializable {
         @Override
         public void reset() {
             //don't do anything
+            try {
+                XMLReader reader = saxParser.getXMLReader();
+                clearReader(reader);
+            } catch (SAXException e) {
+
+            }
         }
     }
 
@@ -849,10 +897,17 @@ public class XMLReaderUtils implements Serializable {
         @Override
         void reset() {
             try {
-                Object object = saxParser.getProperty(XERCES_SECURITY_MANAGER);
+                Object object = saxParser.getProperty(XERCES_SECURITY_MANAGER_PROPERTY);
                 saxParser.reset();
-                saxParser.setProperty(XERCES_SECURITY_MANAGER, object);
+                saxParser.setProperty(XERCES_SECURITY_MANAGER_PROPERTY, object);
+            } catch (SAXException e) {
+                LOG.log(Level.WARNING, "problem resetting sax parser", e);
+            }
+            try {
+                XMLReader reader = saxParser.getXMLReader();
+                clearReader(reader);
             } catch (SAXException e) {
+
             }
         }
     }
@@ -865,6 +920,12 @@ public class XMLReaderUtils implements Serializable {
         @Override
         void reset() {
             saxParser.reset();
+            try {
+                XMLReader reader = saxParser.getXMLReader();
+                clearReader(reader);
+            } catch (SAXException e) {
+
+            }
         }
     }
 
@@ -882,7 +943,23 @@ public class XMLReaderUtils implements Serializable {
             } catch (UnsupportedOperationException e) {
 
             }
+            try {
+                XMLReader reader = saxParser.getXMLReader();
+                clearReader(reader);
+            } catch (SAXException e) {
+
+            }
             trySetXercesSecurityManager(saxParser);
         }
     }
+
+    private static void clearReader(XMLReader reader) {
+        if (reader == null) {
+            return;
+        }
+        reader.setContentHandler(IGNORING_CONTENT_HANDLER);
+        reader.setDTDHandler(IGNORING_DTD_HANDLER);
+        reader.setEntityResolver(IGNORING_SAX_ENTITY_RESOLVER);
+        reader.setErrorHandler(IGNORING_ERROR_HANDLER);
+    }
 }