You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/08/03 17:50:47 UTC

[tika] branch branch_1x updated (4475b72 -> 36fa58f)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 4475b72  TIKA-2673 -- fix forbidden-apis failure and retro-fit for branch_1x
     new f5a2fae  TIKA-2648 detect interpreted server-side script languages
     new bd9d75d  improve xml reading
     new 36fa58f  TIKA-2704

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../main/java/org/apache/tika/mime/MimeType.java   | 17 ++++
 .../main/java/org/apache/tika/mime/MimeTypes.java  | 16 ++--
 .../java/org/apache/tika/mime/MimeTypesReader.java |  3 +
 .../apache/tika/mime/MimeTypesReaderMetKeys.java   |  2 +
 .../java/org/apache/tika/utils/XMLReaderUtils.java | 97 +++++++++++++++++++++-
 .../org/apache/tika/mime/tika-mimetypes.xml        | 12 +--
 .../org/apache/tika/mime/CustomReaderTest.java     |  2 +
 .../org/apache/tika/mime/MimeDetectionTest.java    | 22 +++++
 .../org/apache/tika/mime/custom-mimetypes2.xml     |  2 +-
 .../org/apache/tika/parser/mp3/MpegStream.java     | 34 +++-----
 10 files changed, 169 insertions(+), 38 deletions(-)


[tika] 01/03: TIKA-2648 detect interpreted server-side script languages

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit f5a2faefd17936e1ad2c9b6b8c9b0ea3d3c30d99
Author: =?UTF-8?q?G=C3=A9rard=20Bouchar?= <gb...@protonmail.com>
AuthorDate: Fri Aug 3 13:10:53 2018 -0400

    TIKA-2648 detect interpreted server-side script languages
    
    mime detection based on resource name used to detect
    the mime-type of "http://example.com/test.php" as being "text/x-php"
    whereas given such an URL, the file extension doesn't give
    us any information about the mime type that will be returned
    by the server
---
 .../main/java/org/apache/tika/mime/MimeType.java   | 17 +++++++++++++++++
 .../main/java/org/apache/tika/mime/MimeTypes.java  | 16 +++++++++++-----
 .../java/org/apache/tika/mime/MimeTypesReader.java |  3 +++
 .../apache/tika/mime/MimeTypesReaderMetKeys.java   |  2 ++
 .../org/apache/tika/mime/tika-mimetypes.xml        | 12 ++++++------
 .../org/apache/tika/mime/CustomReaderTest.java     |  2 ++
 .../org/apache/tika/mime/MimeDetectionTest.java    | 22 ++++++++++++++++++++++
 .../org/apache/tika/mime/custom-mimetypes2.xml     |  2 +-
 8 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
index b4d651e..d52c20b 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeType.java
@@ -111,6 +111,12 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
     private List<String> extensions = null;
 
     /**
+     * Whether this mime-type is used for server-side scripts,
+     * and thus cannot reliably be used for filename-based type detection
+     */
+    private boolean isInterpreted = false;
+
+    /**
      * Creates a media type with the give name and containing media type
      * registry. The name is expected to be valid and normalized to lower
      * case. This constructor should only be called by
@@ -303,6 +309,17 @@ public final class MimeType implements Comparable<MimeType>, Serializable {
     }
 
     /**
+     * whether the type is used as a server-side scripting technology
+     */
+    boolean isInterpreted() {
+        return isInterpreted;
+    }
+
+    void setInterpreted(boolean interpreted) {
+        isInterpreted = interpreted;
+    }
+
+    /**
      * Defines a RootXML description. RootXML is made of a localName and/or a
      * namespaceURI.
      */
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
index 705ad3d..38c2ecc 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
@@ -500,10 +500,13 @@ public final class MimeTypes implements Detector, Serializable {
         String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
         if (resourceName != null) {
             String name = null;
+            boolean isHttp = false;
 
             // Deal with a URI or a path name in as the resource  name
             try {
                 URI uri = new URI(resourceName);
+                String scheme = uri.getScheme();
+                isHttp = scheme != null && scheme.startsWith("http"); // http or https
                 String path = uri.getPath();
                 if (path != null) {
                     int slash = path.lastIndexOf('/');
@@ -517,11 +520,14 @@ public final class MimeTypes implements Detector, Serializable {
 
             if (name != null) {
                 MimeType hint = getMimeType(name);
-                
-                // If we have some types based on mime magic, try to specialise
-                //  and/or select the type based on that
-                // Otherwise, use the type identified from the name
-                possibleTypes = applyHint(possibleTypes, hint);
+
+                // For server-side scripting languages, we cannot rely on the filename to detect the mime type
+                if (!(isHttp && hint.isInterpreted())) {
+                    // If we have some types based on mime magic, try to specialise
+                    //  and/or select the type based on that
+                    // Otherwise, use the type identified from the name
+                    possibleTypes = applyHint(possibleTypes, hint);
+                }
             }
         }
 
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
index ad7bd80..cfc030f 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
@@ -169,8 +169,11 @@ public class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMe
         if (type == null) {
             if (MIME_TYPE_TAG.equals(qName)) {
                 String name = attributes.getValue(MIME_TYPE_TYPE_ATTR);
+                String interpretedAttr = attributes.getValue(INTERPRETED_ATTR);
+                boolean interpreted = "true".equals(interpretedAttr);
                 try {
                     type = types.forName(name);
+                    type.setInterpreted(interpreted);
                 } catch (MimeTypeException e) {
                     handleMimeError(name, e, qName, attributes);
                 }
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java
index 98bfee5..c77cc5c 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReaderMetKeys.java
@@ -27,6 +27,8 @@ public interface MimeTypesReaderMetKeys {
 
     String MIME_TYPE_TYPE_ATTR = "type";
 
+    String INTERPRETED_ATTR = "interpreted";
+
     String ACRONYM_TAG = "acronym";
 
     String COMMENT_TAG = "_comment";
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 3c4b4ca..61a1634 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -5932,13 +5932,13 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/asp">
+  <mime-type type="text/asp" interpreted="true">
     <_comment>Active Server Page</_comment>
     <glob pattern="*.asp"/>
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/aspdotnet">
+  <mime-type type="text/aspdotnet" interpreted="true">
     <_comment>ASP .NET</_comment>
     <glob pattern="*.aspx"/>
     <sub-class-of type="text/plain"/>
@@ -6327,7 +6327,7 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/x-cgi">
+  <mime-type type="text/x-cgi" interpreted="true">
     <_comment>CGI script</_comment>
     <glob pattern="*.cgi"/>
     <sub-class-of type="text/plain"/>
@@ -6381,7 +6381,7 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/x-coldfusion">
+  <mime-type type="text/x-coldfusion" interpreted="true">
     <_comment>ColdFusion source code</_comment>
     <glob pattern="*.cfm"/>
     <glob pattern="*.cfml"/>
@@ -6497,7 +6497,7 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/x-jsp">
+  <mime-type type="text/x-jsp" interpreted="true">
     <_comment>Java Server Page</_comment>
     <alias type="application/x-httpd-jsp"/>
     <sub-class-of type="text/plain"/>
@@ -6620,7 +6620,7 @@
     <sub-class-of type="text/plain"/>
   </mime-type>
 
-  <mime-type type="text/x-php">
+  <mime-type type="text/x-php" interpreted="true">
     <_comment>PHP script</_comment>
     <magic priority="50">
       <match value="&lt;?php" type="string" offset="0"/>
diff --git a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
index 8928727..df51d45 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
@@ -23,6 +23,7 @@ import java.util.Map;
 
 import org.junit.Test;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
@@ -89,6 +90,7 @@ public class CustomReaderTest {
     assertEquals(1, reader.ignorePatterns.size());
     assertEquals(another.toString()+">>*"+hello.getExtension(), 
         reader.ignorePatterns.get(0));
+    assertTrue("Server-side script type not detected", another.isInterpreted());
     
     //System.out.println( mimeTypes.getMediaTypeRegistry().getTypes() );
   }
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 6b16360..43eebd2 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -83,6 +83,21 @@ public class MimeDetectionTest {
     }
 
     @Test
+    public void testDetectionWithoutContent() throws IOException {
+        testUrlWithoutContent("text/html", "test.html");
+        testUrlWithoutContent("text/html", "http://test.com/test.html");
+        testUrlWithoutContent("text/plain", "http://test.com/test.txt");
+
+        // In case the url contains a filename referencing a server-side scripting language,
+        // it gives us no clue concerning the actual mime type of the response
+        testUrlWithoutContent("application/octet-stream", "http://test.com/test.php");
+        testUrlWithoutContent("application/octet-stream", "http://test.com/test.cgi");
+        testUrlWithoutContent("application/octet-stream", "http://test.com/test.jsp");
+        // But in case the protocol is not http or https, the script is probably not interpreted
+        testUrlWithoutContent("text/x-php", "ftp://test.com/test.php");
+    }
+
+    @Test
     public void testByteOrderMark() throws Exception {
         assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
                 new ByteArrayInputStream("\ufefftest".getBytes(UTF_16LE)),
@@ -136,6 +151,13 @@ public class MimeDetectionTest {
         testStream(expected, url, in);
     }
 
+    private void testUrlWithoutContent(String expected, String url) throws IOException {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, url);
+        String mime = this.mimeTypes.detect(null, metadata).toString();
+        assertEquals(url + " is not properly detected using only resource name", expected, mime);
+    }
+
     private void testUrl(String expected, String url, String file) throws IOException{
         InputStream in = getClass().getResourceAsStream(file);
         testStream(expected, url, in);
diff --git a/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml b/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
index 2001d59..92d70cb 100644
--- a/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
+++ b/tika-core/src/test/resources/org/apache/tika/mime/custom-mimetypes2.xml
@@ -16,7 +16,7 @@
   limitations under the License.
 -->
 <mime-info>
-  <mime-type type="another/world-file">
+  <mime-type type="another/world-file" interpreted="true">
      <hello>kittens</hello>
      <glob pattern="*.hello.world" /> <!-- Will collide with 'hello/world-file'  -->
      <sub-class-of type="hello/world" />


[tika] 03/03: TIKA-2704

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 36fa58f4c298248398749be927e6fdb9868e5676
Author: TALLISON <ta...@apache.org>
AuthorDate: Fri Aug 3 13:49:54 2018 -0400

    TIKA-2704
---
 .../org/apache/tika/parser/mp3/MpegStream.java     | 34 +++++++---------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
index 83a9c87..1814c12 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
@@ -16,6 +16,9 @@
  */
 package org.apache.tika.parser.mp3;
 
+import org.apache.poi.util.IOUtils;
+
+import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
@@ -157,7 +160,12 @@ class MpegStream extends PushbackInputStream
     {
         if (currentHeader != null)
         {
-            skipStream(in, currentHeader.getLength() - HEADER_SIZE);
+            long toSkip = currentHeader.getLength() - HEADER_SIZE;
+            long skipped = IOUtils.skipFully(in, toSkip);
+            if (skipped < toSkip) {
+                throw new EOFException("EOF: tried to skip "+toSkip +
+                        " but could only skip "+skipped);
+            }
             currentHeader = null;
             return true;
         }
@@ -267,28 +275,6 @@ class MpegStream extends PushbackInputStream
     }
 
     /**
-     * Skips the given number of bytes from the specified input stream.
-     * 
-     * @param in the input stream
-     * @param count the number of bytes to skip
-     * @throws IOException if an IO error occurs
-     */
-    private static void skipStream(InputStream in, long count)
-            throws IOException
-    {
-        long size = count;
-        long skipped = 0;
-        while (size > 0 && skipped >= 0)
-        {
-            skipped = in.skip(size);
-            if (skipped != -1)
-            {
-                size -= skipped;
-            }
-        }
-    }
-    
-    /**
      * Calculates the bit rate based on the given parameters.
      * 
      * @param mpegVer the MPEG version
@@ -428,7 +414,7 @@ class MpegStream extends PushbackInputStream
          * index. E.g. ''from'' = 0, ''to'' = 3 will return the value of the
          * first 4 bits.
          * 
-         * @param the from index
+         * @param from index
          * @param to the to index
          * @return the value of this group of bits
          */


[tika] 02/03: improve xml reading

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bd9d75d8b0a85af2937047bfad04288c3044b2a6
Author: TALLISON <ta...@apache.org>
AuthorDate: Fri Aug 3 13:16:09 2018 -0400

    improve xml reading
---
 .../java/org/apache/tika/utils/XMLReaderUtils.java | 97 +++++++++++++++++++++-
 1 file changed, 95 insertions(+), 2 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 10c2274..382be2d 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -45,6 +45,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.Serializable;
 import java.io.StringReader;
+import java.lang.reflect.Method;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -70,6 +71,8 @@ public class XMLReaderUtils implements Serializable {
      */
     private static int POOL_SIZE = 10;
 
+    private static long LAST_LOG = -1;
+
     //TODO: figure out if the rw lock is any better than a simple lock
     private static final ReentrantReadWriteLock SAX_READ_WRITE_LOCK = new ReentrantReadWriteLock();
     private static final ReentrantReadWriteLock DOM_READ_WRITE_LOCK = new ReentrantReadWriteLock();
@@ -138,7 +141,9 @@ public class XMLReaderUtils implements Serializable {
      */
     public static SAXParser getSAXParser() throws TikaException {
         try {
-            return getSAXParserFactory().newSAXParser();
+            SAXParser parser = getSAXParserFactory().newSAXParser();
+            trySetXercesSecurityManager(parser);
+            return parser;
         } catch (ParserConfigurationException e) {
             throw new TikaException("Unable to configure a SAX parser", e);
         } catch (SAXException e) {
@@ -202,6 +207,7 @@ public class XMLReaderUtils implements Serializable {
         trySetSAXFeature(factory, "http://xml.org/sax/features/external-parameter-entities", false);
         trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
         trySetSAXFeature(factory, "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
+        trySetXercesSecurityManager(factory);
         return factory;
     }
 
@@ -244,6 +250,7 @@ public class XMLReaderUtils implements Serializable {
         tryToSetStaxProperty(factory, XMLInputFactory.IS_VALIDATING, false);
 
         factory.setXMLResolver(IGNORING_STAX_ENTITY_RESOLVER);
+        trySetStaxSecurityManager(factory);
         return factory;
     }
 
@@ -261,7 +268,7 @@ public class XMLReaderUtils implements Serializable {
         try {
             factory.setProperty(key, value);
         } catch (IllegalArgumentException e) {
-            //swallow
+            LOG.log(Level.WARNING, "StAX Feature unsupported: " + key, e);
         }
     }
 
@@ -499,4 +506,90 @@ public class XMLReaderUtils implements Serializable {
         }
         POOL_SIZE = poolSize;
     }
+
+    private static void trySetXercesSecurityManager(DocumentBuilderFactory factory) {
+        //from POI
+        // Try built-in JVM one first, standalone if not
+        for (String securityManagerClassName : new String[] {
+                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+                "org.apache.xerces.util.SecurityManager"
+        }) {
+            try {
+                Object mgr = Class.forName(securityManagerClassName).newInstance();
+                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+                setLimit.invoke(mgr, 4096);
+                factory.setAttribute("http://apache.org/xml/properties/security-manager", mgr);
+                // Stop once one can be setup without error
+                return;
+            } catch (ClassNotFoundException e) {
+                // continue without log, this is expected in some setups
+            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
+                // throttle the log somewhat as it can spam the log otherwise
+                if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                    LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                    LAST_LOG = System.currentTimeMillis();
+                }
+            }
+        }
+
+        // separate old version of Xerces not found => use the builtin way of setting the property
+        try {
+            factory.setAttribute("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+        } catch (IllegalArgumentException e) {     // NOSONAR - also catch things like NoClassDefError here
+            // throttle the log somewhat as it can spam the log otherwise
+            if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                LAST_LOG = System.currentTimeMillis();
+            }
+        }
+    }
+
+    private static void trySetXercesSecurityManager(SAXParser parser) {
+        //from POI
+        // Try built-in JVM one first, standalone if not
+        for (String securityManagerClassName : new String[] {
+                //"com.sun.org.apache.xerces.internal.util.SecurityManager",
+                "org.apache.xerces.util.SecurityManager"
+        }) {
+            try {
+                Object mgr = Class.forName(securityManagerClassName).newInstance();
+                Method setLimit = mgr.getClass().getMethod("setEntityExpansionLimit", Integer.TYPE);
+                setLimit.invoke(mgr, 4096);
+                parser.setProperty("http://apache.org/xml/properties/security-manager", mgr);
+                // Stop once one can be setup without error
+                return;
+            } catch (ClassNotFoundException e) {
+                // continue without log, this is expected in some setups
+            } catch (Throwable e) {     // NOSONAR - also catch things like NoClassDefError here
+                // throttle the log somewhat as it can spam the log otherwise
+                if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                    LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                    LAST_LOG = System.currentTimeMillis();
+                }
+            }
+        }
+
+        // separate old version of Xerces not found => use the builtin way of setting the property
+        try {
+            parser.setProperty("http://www.oracle.com/xml/jaxp/properties/entityExpansionLimit", 4096);
+        } catch (SAXException e) {     // NOSONAR - also catch things like NoClassDefError here
+            // throttle the log somewhat as it can spam the log otherwise
+            if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                LAST_LOG = System.currentTimeMillis();
+            }
+        }
+    }
+
+    private static void trySetStaxSecurityManager(XMLInputFactory inputFactory) {
+        try {
+            inputFactory.setProperty("com.ctc.wstx.maxEntityCount", 4096);
+        } catch (IllegalArgumentException e) {
+            // throttle the log somewhat as it can spam the log otherwise
+            if(System.currentTimeMillis() > LAST_LOG + TimeUnit.MINUTES.toMillis(5)) {
+                LOG.log(Level.WARNING, "SAX Security Manager could not be setup [log suppressed for 5 minutes]", e);
+                LAST_LOG = System.currentTimeMillis();
+            }
+        }
+    }
 }